[MLIR] Add broadcasting support for element wise operations (#398)
* Add broadcasting support for elementwise operations * Remove MLIRDialect from MLIRWholeArchiveLibs * Rewrite getLoopIVsForBroadcasting * Compute dimensions for allocating result memory * Compute dimensions for allocating result memory (revised) * Use static dimension for element-wise operation testcases * Add a test for addition with broadcasting * Missed Traits.h when merging * Revise * Update SharedWork.md * Broadcasting for variadic operations * Edit comments * Update SharedWork.md * Reorganize the code * Add CHECK-LABEL for test_add_with_broadcasting
This commit is contained in:
		
							parent
							
								
									0a8af69e94
								
							
						
					
					
						commit
						06a968d4a1
					
				| 
						 | 
				
			
			@ -57,6 +57,7 @@ endfunction(find_mlir_lib)
 | 
			
		|||
find_mlir_lib(MLIRAffineOps)
 | 
			
		||||
find_mlir_lib(MLIRAffineToStandard)
 | 
			
		||||
find_mlir_lib(MLIRAnalysis)
 | 
			
		||||
find_mlir_lib(MLIRDialect)
 | 
			
		||||
find_mlir_lib(MLIRExecutionEngine)
 | 
			
		||||
find_mlir_lib(MLIRIR)
 | 
			
		||||
find_mlir_lib(MLIRLLVMIR)
 | 
			
		||||
| 
						 | 
				
			
			@ -114,6 +115,7 @@ set(MLIRLibsOnce
 | 
			
		|||
        MLIRAffineOps
 | 
			
		||||
        MLIRAffineToStandard
 | 
			
		||||
        MLIRAnalysis
 | 
			
		||||
        MLIRDialect
 | 
			
		||||
        MLIRExecutionEngine
 | 
			
		||||
        MLIRIR
 | 
			
		||||
        MLIRLLVMIR
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,10 +8,10 @@ ONNX operations for which some work is needed.
 | 
			
		|||
 | 
			
		||||
| ONNX Oper   | Person working on it  | ONNX 2 KRNL    | Basic functionality   | Extended functionality (e.g. broadcast)  |
 | 
			
		||||
| ----------  | --------------------- | -------------- | --------------------- | ---------------------------------------- |
 | 
			
		||||
| Add         | Tung (updated)        | v              | v                     | noM                                      |
 | 
			
		||||
| And         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Cosh        | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Div         | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Add         | Tung (updated)        | v              | v                     | M                                        |
 | 
			
		||||
| And         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Cosh        | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Div         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Elu         | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Exp         | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| FullGemm    |                       |                |                       | noU                                      |
 | 
			
		||||
| 
						 | 
				
			
			@ -19,18 +19,18 @@ ONNX operations for which some work is needed.
 | 
			
		|||
| HardSigmoid | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| LeakyRelu   | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| MatMul      |                       |                |                       | noM                                      |
 | 
			
		||||
| Max         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Min         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Mul         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Or          | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Max         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Min         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Mul         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Or          | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Relu        | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Selu        | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Sigmoid     | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Sinh        | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Sub         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Sum         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Sub         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Sum         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
| Tanh        | Tung                  | v              | v                     |                                          |
 | 
			
		||||
| Xor         | Tung                  | v              | v                     | noM                                      |
 | 
			
		||||
| Xor         | Tung                  | v              | v                     | M                                        |
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ONNX operations for which the work is completed (full functionality) and tested
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,6 +8,7 @@
 | 
			
		|||
//
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
 | 
			
		||||
#include "mlir/Dialect/Traits.h"
 | 
			
		||||
#include "mlir/IR/Block.h"
 | 
			
		||||
#include "mlir/IR/Builders.h"
 | 
			
		||||
#include "mlir/IR/Function.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -21,6 +22,7 @@
 | 
			
		|||
#include "onnx_ops.hpp"
 | 
			
		||||
 | 
			
		||||
using namespace mlir;
 | 
			
		||||
using namespace mlir::OpTrait::util;
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
// ONNXOpsDialect
 | 
			
		||||
| 
						 | 
				
			
			@ -127,7 +129,12 @@ void ONNXReciprocalOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXAddOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXAddOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -135,7 +142,12 @@ void ONNXAddOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXMulOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXMulOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -143,7 +155,12 @@ void ONNXMulOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXDivOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXDivOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -151,7 +168,12 @@ void ONNXDivOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXSubOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXSubOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -159,21 +181,38 @@ void ONNXSubOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXAndOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXAndOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
// Or
 | 
			
		||||
/// Infer the output shape of the ONNXOrOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXOrOp::inferShapes() { getResult()->setType(getOperand(0)->getType()); }
 | 
			
		||||
void ONNXOrOp::inferShapes() {
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
// Xor
 | 
			
		||||
/// Infer the output shape of the ONNXXorOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXXorOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  if (!getOperand(0)->getType().isa<RankedTensorType>() ||
 | 
			
		||||
      !getOperand(1)->getType().isa<RankedTensorType>())
 | 
			
		||||
    return;
 | 
			
		||||
  auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
 | 
			
		||||
  getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -183,7 +222,16 @@ void ONNXXorOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXSumOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXSumOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  for (int i = 0; i < getNumOperands(); ++i) {
 | 
			
		||||
    if (!getOperand(i)->getType().cast<RankedTensorType>())
 | 
			
		||||
      return;
 | 
			
		||||
  }
 | 
			
		||||
  Type resultTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  for (int i = 1; i < getNumOperands(); ++i) {
 | 
			
		||||
    Type nextTy = getOperand(i)->getType().cast<RankedTensorType>();
 | 
			
		||||
    resultTy = getBroadcastedType(resultTy, nextTy);
 | 
			
		||||
  }
 | 
			
		||||
  getResult()->setType(resultTy);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -191,7 +239,16 @@ void ONNXSumOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXMaxOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXMaxOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  for (int i = 0; i < getNumOperands(); ++i) {
 | 
			
		||||
    if (!getOperand(i)->getType().cast<RankedTensorType>())
 | 
			
		||||
      return;
 | 
			
		||||
  }
 | 
			
		||||
  Type resultTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  for (int i = 1; i < getNumOperands(); ++i) {
 | 
			
		||||
    Type nextTy = getOperand(i)->getType().cast<RankedTensorType>();
 | 
			
		||||
    resultTy = getBroadcastedType(resultTy, nextTy);
 | 
			
		||||
  }
 | 
			
		||||
  getResult()->setType(resultTy);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -199,7 +256,16 @@ void ONNXMaxOp::inferShapes() {
 | 
			
		|||
/// Infer the output shape of the ONNXMinOp. This method is required by the
 | 
			
		||||
/// shape inference interface.
 | 
			
		||||
void ONNXMinOp::inferShapes() {
 | 
			
		||||
  getResult()->setType(getOperand(0)->getType());
 | 
			
		||||
  for (int i = 0; i < getNumOperands(); ++i) {
 | 
			
		||||
    if (!getOperand(i)->getType().cast<RankedTensorType>())
 | 
			
		||||
      return;
 | 
			
		||||
  }
 | 
			
		||||
  Type resultTy = getOperand(0)->getType().cast<RankedTensorType>();
 | 
			
		||||
  for (int i = 1; i < getNumOperands(); ++i) {
 | 
			
		||||
    Type nextTy = getOperand(i)->getType().cast<RankedTensorType>();
 | 
			
		||||
    resultTy = getBroadcastedType(resultTy, nextTy);
 | 
			
		||||
  }
 | 
			
		||||
  getResult()->setType(resultTy);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,6 +9,8 @@
 | 
			
		|||
//
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
 | 
			
		||||
#include <map>
 | 
			
		||||
 | 
			
		||||
#include "llvm/ADT/ArrayRef.h"
 | 
			
		||||
#include "llvm/ADT/Sequence.h"
 | 
			
		||||
#include "mlir/Dialect/AffineOps/AffineOps.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -44,16 +46,51 @@ static MemRefType convertTensorToMemRef(TensorType type) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/// Insert an allocation and deallocation for the given MemRefType.
 | 
			
		||||
static Value* insertAllocAndDealloc(MemRefType type, Location loc,
 | 
			
		||||
    PatternRewriter& rewriter, bool insertDealloc, Value* oldMemRef = nullptr) {
 | 
			
		||||
static Value *insertAllocAndDealloc(MemRefType type, Location loc,
 | 
			
		||||
                                    PatternRewriter &rewriter,
 | 
			
		||||
                                    bool insertDealloc,
 | 
			
		||||
                                    ArrayRef<Value *> operands = {}) {
 | 
			
		||||
  // Put together alloc operands for any dynamic dimensions of the memref.
 | 
			
		||||
  AllocOp alloc;
 | 
			
		||||
  if (oldMemRef) {
 | 
			
		||||
    SmallVector<Value*, 4> allocOperands;
 | 
			
		||||
  if (!operands.empty()) {
 | 
			
		||||
    auto memRefShape = type.getShape();
 | 
			
		||||
    for (int i = 0; i < memRefShape.size(); ++i)
 | 
			
		||||
    auto rank = memRefShape.size();
 | 
			
		||||
 | 
			
		||||
    std::map<int, Value *> fromOperands;
 | 
			
		||||
    for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
 | 
			
		||||
      int memRefDimIdx = rank - 1 - reversedIdx;
 | 
			
		||||
      if (memRefShape[memRefDimIdx] < 0) { // unknown dimension
 | 
			
		||||
        Value *maxDim = nullptr;
 | 
			
		||||
        for (int i = 0; i < operands.size(); i++) {
 | 
			
		||||
          auto operandShape =
 | 
			
		||||
              operands[i]->getType().cast<MemRefType>().getShape();
 | 
			
		||||
          int operandDimIdx = operandShape.size() - 1 - reversedIdx;
 | 
			
		||||
 | 
			
		||||
          if (operandDimIdx < 0)
 | 
			
		||||
            continue;
 | 
			
		||||
 | 
			
		||||
          // In case of operations with broadcasting, the dimension of the
 | 
			
		||||
          // alloc result is the maximum size along each dimension of the
 | 
			
		||||
          // operands.
 | 
			
		||||
          auto operandDim =
 | 
			
		||||
              rewriter.create<DimOp>(loc, operands[i], operandDimIdx);
 | 
			
		||||
          if (maxDim) {
 | 
			
		||||
            auto maxCondition = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt,
 | 
			
		||||
                                                        operandDim, maxDim);
 | 
			
		||||
            maxDim = rewriter.create<SelectOp>(loc, maxCondition, operandDim,
 | 
			
		||||
                                               maxDim);
 | 
			
		||||
          } else {
 | 
			
		||||
            maxDim = operandDim;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        fromOperands.insert(std::make_pair(memRefDimIdx, maxDim));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    SmallVector<Value *, 4> allocOperands;
 | 
			
		||||
    for (int i = 0; i < rank; ++i)
 | 
			
		||||
      if (memRefShape[i] < 0)
 | 
			
		||||
        allocOperands.push_back(rewriter.create<DimOp>(loc, oldMemRef, i));
 | 
			
		||||
        allocOperands.push_back(fromOperands[i]);
 | 
			
		||||
    alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
 | 
			
		||||
  } else {
 | 
			
		||||
    alloc = rewriter.create<AllocOp>(loc, type);
 | 
			
		||||
| 
						 | 
				
			
			@ -109,6 +146,89 @@ unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
 | 
			
		|||
  return llvm::divideCeil(sizeInBits, 8);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get run-time dimension information for unknown dimensions used for
 | 
			
		||||
// broadcasting.
 | 
			
		||||
std::map<int, std::map<int, Value *> >
 | 
			
		||||
getBroadcastedDimInfo(Location loc, ConversionPatternRewriter &rewriter,
 | 
			
		||||
                      MemRefType memRefType, ArrayRef<Value *> operands) {
 | 
			
		||||
  auto memRefShape = memRefType.getShape();
 | 
			
		||||
  int64_t rank = memRefShape.size();
 | 
			
		||||
  // For unknown dimensions, we need to get dimension values at runtime in
 | 
			
		||||
  // order to do broadcasting.
 | 
			
		||||
  std::map<int, std::map<int, Value *>> DimInfo;
 | 
			
		||||
  // For each result dimension, compute the number of sharing operands.
 | 
			
		||||
  // Sharing operands are operands sharing the same index (counting from the
 | 
			
		||||
  // rightmost to the leftmost) for a given dimension.
 | 
			
		||||
  std::map<int, int> sharedDimCount;
 | 
			
		||||
  for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
 | 
			
		||||
    int dimIdx = rank - 1 - reversedIdx;
 | 
			
		||||
    sharedDimCount[dimIdx] = 0;
 | 
			
		||||
    for (int i = 0; i < operands.size(); ++i) {
 | 
			
		||||
      auto shape = operands[i]->getType().cast<MemRefType>().getShape();
 | 
			
		||||
      if (reversedIdx <= shape.size() - 1)
 | 
			
		||||
        sharedDimCount[dimIdx]++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // An unknown dimension can have a value of 1 or N (N > 1).
 | 
			
		||||
  // If its value is 1, it is broadcasted dimension.
 | 
			
		||||
  // Otherwise, non-broadcasted dimension.
 | 
			
		||||
  // We only care about unknown dimensions whose number of sharing operands is
 | 
			
		||||
  // more than one, since they are potentially broadcasted dimensions.
 | 
			
		||||
  for (int i = 0; i < operands.size(); ++i) {
 | 
			
		||||
    std::map<int, Value *> broadcastedDims;
 | 
			
		||||
    auto shape = operands[i]->getType().cast<MemRefType>().getShape();
 | 
			
		||||
    int size = shape.size();
 | 
			
		||||
    for (int j = 0; j < shape.size(); ++j) {
 | 
			
		||||
      if (shape[j] < 0 and sharedDimCount[rank - size + j] > 1) {
 | 
			
		||||
        auto dim = rewriter.create<DimOp>(loc, operands[i], j).getResult();
 | 
			
		||||
        auto one = rewriter.create<ConstantIndexOp>(loc, 1);
 | 
			
		||||
        auto isBroadcasted =
 | 
			
		||||
            rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
 | 
			
		||||
        broadcastedDims.insert(std::make_pair(j, isBroadcasted));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    DimInfo.insert(std::make_pair(i, broadcastedDims));
 | 
			
		||||
  }
 | 
			
		||||
  return DimInfo;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Extract induction variables that are used for broadcasting values of a
 | 
			
		||||
// given operand.
 | 
			
		||||
std::vector<Value *>
 | 
			
		||||
getLoopIVsForBroadcasting(Location loc, ConversionPatternRewriter &rewriter,
 | 
			
		||||
                           ArrayRef<Value *> loopIVs, Value *operand,
 | 
			
		||||
                           std::map<int, Value *> broadcastedDims) {
 | 
			
		||||
  // `operand` must has a ranked type. This should have been checked by the
 | 
			
		||||
  // shape inference pass.
 | 
			
		||||
  auto operandShape = operand->getType().cast<MemRefType>().getShape();
 | 
			
		||||
  auto rank = operandShape.size();
 | 
			
		||||
  auto loopCount = loopIVs.size();
 | 
			
		||||
 | 
			
		||||
  std::vector<Value*> newLoopIVs;
 | 
			
		||||
  for (unsigned reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
 | 
			
		||||
    auto dimIdx = rank - 1 - reversedIdx;
 | 
			
		||||
    auto loopIdx = loopCount - 1 - reversedIdx;
 | 
			
		||||
    if (operandShape[dimIdx] == 1) {
 | 
			
		||||
      // Broadcasted dimension
 | 
			
		||||
      auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
 | 
			
		||||
      newLoopIVs.insert(newLoopIVs.begin(), zero);
 | 
			
		||||
    } else if ((operandShape[dimIdx] == -1) &&
 | 
			
		||||
               (broadcastedDims.find(dimIdx) != broadcastedDims.end())) {
 | 
			
		||||
      // Unknown dimension, it can have a value of 1 or N (N > 1).
 | 
			
		||||
      // If its value is 1, it is broadcasted dimension.
 | 
			
		||||
      // Otherwise, non-broadcasted dimension.
 | 
			
		||||
      auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
 | 
			
		||||
      auto idx = rewriter.create<SelectOp>(loc, broadcastedDims[dimIdx],
 | 
			
		||||
                                           zero, loopIVs[loopIdx]);
 | 
			
		||||
      newLoopIVs.insert(newLoopIVs.begin(), idx);
 | 
			
		||||
    } else {
 | 
			
		||||
      // Non-broadcasted dimension
 | 
			
		||||
      newLoopIVs.insert(newLoopIVs.begin(), loopIVs[loopIdx]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return newLoopIVs;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
template <typename ElementwiseNaryOp>
 | 
			
		||||
| 
						 | 
				
			
			@ -505,7 +625,7 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
 | 
			
		|||
      alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
 | 
			
		||||
    else
 | 
			
		||||
      alloc = insertAllocAndDealloc(
 | 
			
		||||
          memRefType, loc, rewriter, insertDealloc, operands[0]);
 | 
			
		||||
          memRefType, loc, rewriter, insertDealloc, {operands[0]});
 | 
			
		||||
 | 
			
		||||
    // Number of loops
 | 
			
		||||
    auto memRefShape = memRefType.getShape();
 | 
			
		||||
| 
						 | 
				
			
			@ -595,20 +715,18 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
 | 
			
		|||
    // Insert an allocation and deallocation for the result of this operation.
 | 
			
		||||
    auto memRefType = convertTensorToMemRef(tensorType);
 | 
			
		||||
 | 
			
		||||
    // If the output has a dynamic dimension, pass the operands required for
 | 
			
		||||
    // each dynamic dimension to the AllocOp. The first operand of the
 | 
			
		||||
    // operation is used. The operands of the op need to match in terms of
 | 
			
		||||
    // dimensions with the result at this pre-optimization phase.
 | 
			
		||||
    // TODO: verify that dimensions match.
 | 
			
		||||
    // TODO: can the dimension of the result differ after optimizations?
 | 
			
		||||
    Value* alloc;
 | 
			
		||||
    bool insertDealloc = checkInsertDealloc(op);
 | 
			
		||||
 | 
			
		||||
    // If the output has a dynamic dimension, we compute its dimension at
 | 
			
		||||
    // runtime by using dimensions from the operands.
 | 
			
		||||
    // In particular, we need to know from which operand a result dimension
 | 
			
		||||
    // comes from.
 | 
			
		||||
    // TODO: can the dimension of the result differ after optimizations?
 | 
			
		||||
    if (hasAllConstantDimensions(memRefType))
 | 
			
		||||
      alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
 | 
			
		||||
    else
 | 
			
		||||
      alloc = insertAllocAndDealloc(
 | 
			
		||||
          memRefType, loc, rewriter, insertDealloc, operands[0]);
 | 
			
		||||
          memRefType, loc, rewriter, insertDealloc, operands);
 | 
			
		||||
 | 
			
		||||
    // Number of loops
 | 
			
		||||
    auto memRefShape = memRefType.getShape();
 | 
			
		||||
| 
						 | 
				
			
			@ -639,13 +757,18 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
 | 
			
		|||
      if (memRefShape[i] < 0) {
 | 
			
		||||
        pack.pushConstantBound(0);
 | 
			
		||||
        pack.pushOperandBound(
 | 
			
		||||
            rewriter.create<DimOp>(loc, operands[0], i).getResult());
 | 
			
		||||
            rewriter.create<DimOp>(loc, alloc, i).getResult());
 | 
			
		||||
      } else {
 | 
			
		||||
        pack.pushConstantBound(0);
 | 
			
		||||
        pack.pushConstantBound(memRefShape[i]);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Get run-time dimension information for unknown dimensions used for
 | 
			
		||||
    // broadcasting.
 | 
			
		||||
    std::map<int, std::map<int, Value *>> broadcastedDimInfo =
 | 
			
		||||
        getBroadcastedDimInfo(loc, rewriter, memRefType, operands);
 | 
			
		||||
 | 
			
		||||
    auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
 | 
			
		||||
    Block& iterationBlock = iterateOp.bodyRegion().front();
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -655,8 +778,7 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
 | 
			
		|||
    // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
 | 
			
		||||
    rewriter.setInsertionPointToEnd(&optimizationBlock);
 | 
			
		||||
    // Return from KrnlOptimizeLoopsOp body.
 | 
			
		||||
    // When no optimizations are present we just return the loops
 | 
			
		||||
    // unchaged.
 | 
			
		||||
    // When no optimizations are present we just return the loops unchaged.
 | 
			
		||||
    rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
 | 
			
		||||
    rewriter.setInsertionPoint(optimizedLoopsOp);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -670,9 +792,13 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
 | 
			
		|||
 | 
			
		||||
    // Fold over operands for each of their scalar values
 | 
			
		||||
    Value *accumulated, *next;
 | 
			
		||||
    accumulated = rewriter.create<LoadOp>(loc, operands[0], loopIVs);
 | 
			
		||||
    auto accumulatedLoopIVs = getLoopIVsForBroadcasting(
 | 
			
		||||
        loc, rewriter, loopIVs, operands[0], broadcastedDimInfo[0]);
 | 
			
		||||
    accumulated = rewriter.create<LoadOp>(loc, operands[0], accumulatedLoopIVs);
 | 
			
		||||
    for (unsigned i = 1; i < numArgs; i++) {
 | 
			
		||||
      next = rewriter.create<LoadOp>(loc, operands[i], loopIVs);
 | 
			
		||||
      auto nextLoopIVs = getLoopIVsForBroadcasting(
 | 
			
		||||
          loc, rewriter, loopIVs, operands[i], broadcastedDimInfo[i]);
 | 
			
		||||
      next = rewriter.create<LoadOp>(loc, operands[i], nextLoopIVs);
 | 
			
		||||
      accumulated = mapToLowerScalarOp<ElementwiseVariadicOp>(
 | 
			
		||||
          op, memRefType.getElementType(), {accumulated, next}, rewriter);
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,143 +1,129 @@
 | 
			
		|||
// RUN: onnf-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
 | 
			
		||||
 | 
			
		||||
func @test_add(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Add"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Add"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_add
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_mul(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Mul"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Mul"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_mul
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_div(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Div"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Div"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_div
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_sub(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sub"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sub"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_sub
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_and(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.And"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
func @test_and(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.And"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xi32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_and
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xi32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_or(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Or"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
func @test_or(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Or"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xi32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_or
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xi32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_xor(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Xor"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
func @test_xor(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Xor"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xi32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_xor
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xi32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
| 
						 | 
				
			
			@ -310,66 +296,60 @@ func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi32>) -> tensor<*x
 | 
			
		|||
  // CHECK: return [[ALLOC]] : memref<?x?x?x?xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_sum(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sum"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sum"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_sum
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_max(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Max"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Max"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_max
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_min(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Min"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Min"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_min
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
| 
						 | 
				
			
			@ -495,3 +475,29 @@ func @test_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		|||
  // CHECK: store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_add_with_broadcasting(%arg0 : tensor<?xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Add"(%arg0, %arg1) : (tensor<?xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%0) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_add_with_broadcasting
 | 
			
		||||
  // CHECK: [[DIM1:%.+]] = dim %arg1, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM1]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DIM3:%.+]] = dim %arg0, 0 : memref<?xf32>
 | 
			
		||||
  // CHECK: [[ONE:%.+]] = constant 1 : index
 | 
			
		||||
  // CHECK: [[IS_ONE:%.+]] = cmpi "eq", [[DIM3]], [[ONE]] : index
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[ZERO:%.+]] = constant 0 : index
 | 
			
		||||
  // CHECK: %[[SELECT1:.+]] = select [[IS_ONE]], [[ZERO]], %arg3 : index
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%[[SELECT1]]] : memref<?xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: }
 | 
			
		||||
  // CHECK: return [[RES]] : memref<?x10xf32>
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,291 +1,263 @@
 | 
			
		|||
// RUN: onnf-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
 | 
			
		||||
 | 
			
		||||
func @test_add_add(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Add"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Add"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Add"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Add"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_add_add
 | 
			
		||||
  /// First Add
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Second Add
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
func @test_mul_mul(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Mul"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Mul"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Mul"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Mul"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_mul_mul
 | 
			
		||||
  /// First Mul
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Second Mul
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_div_div(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Div"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Div"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Div"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Div"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_div_div
 | 
			
		||||
  /// First Div
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Second Div
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_sub_sub(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sub"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Sub"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sub"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Sub"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_sub_sub
 | 
			
		||||
  /// First Sub
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Second Sub
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_and_and(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.And"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  %1 = "onnx.And"(%0, %arg1) : (tensor<*xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
func @test_and_and(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.And"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  %1 = "onnx.And"(%0, %arg1) : (tensor<*xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xi32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_and_and
 | 
			
		||||
  /// First And
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  /// Second And
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xi32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xi32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_or_or(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Or"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  %1 = "onnx.Or"(%0, %arg1) : (tensor<*xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
func @test_or_or(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Or"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  %1 = "onnx.Or"(%0, %arg1) : (tensor<*xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xi32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_or_or
 | 
			
		||||
  /// First Or
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  /// Second Or
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xi32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xi32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_xor_xor(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Xor"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  %1 = "onnx.Xor"(%0, %arg1) : (tensor<*xi32>, tensor<?x10xi32>) -> tensor<*xi32>
 | 
			
		||||
func @test_xor_xor(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
 | 
			
		||||
  %0 = "onnx.Xor"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  %1 = "onnx.Xor"(%0, %arg1) : (tensor<*xi32>, tensor<10x10xi32>) -> tensor<*xi32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xi32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_xor_xor
 | 
			
		||||
  /// First Xor
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  /// Second Xor
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
  // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32
 | 
			
		||||
  // CHECK: store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xi32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xi32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xi32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xi32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
| 
						 | 
				
			
			@ -572,131 +544,119 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		|||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_sum_sum(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sum"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Sum"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Sum"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Sum"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_sum_sum
 | 
			
		||||
  /// First Sum
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Second Sum
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_max_max(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Max"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Max"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Max"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Max"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_max_max
 | 
			
		||||
  /// First Max
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  
 | 
			
		||||
  /// Second Max
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
  
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_min_min(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Min"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Min"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32>
 | 
			
		||||
func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
  %0 = "onnx.Min"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  %1 = "onnx.Min"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
 | 
			
		||||
  "std.return"(%1) : (tensor<*xf32>) -> ()
 | 
			
		||||
 | 
			
		||||
  // CHECK-LABEL: test_min_min
 | 
			
		||||
  /// First Min
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  
 | 
			
		||||
  /// Second Min
 | 
			
		||||
  // CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
 | 
			
		||||
  // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  {
 | 
			
		||||
  // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
 | 
			
		||||
  // CHECK: } : () -> (!krnl.loop, !krnl.loop)
 | 
			
		||||
  // CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
 | 
			
		||||
  // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
 | 
			
		||||
  
 | 
			
		||||
  /// Dealloc of first result.
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: dealloc [[RES]] : memref<10x10xf32>
 | 
			
		||||
  // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
  
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<?x10xf32>
 | 
			
		||||
  // CHECK: return [[RET_RES]] : memref<10x10xf32>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue