Merge remote-tracking branch 'upstream/master' into shapeinference-pad
Conflicts: test/mlir/onnx/onnx_shape_inference.mlir
This commit is contained in:
commit
bbdf4e3b4d
|
@ -442,7 +442,7 @@ void ONNXMatMulOp::inferShapes() {
|
||||||
lhsShape[0] != rhsShape[rhsRank - 2])
|
lhsShape[0] != rhsShape[rhsRank - 2])
|
||||||
emitError("Attempt to multiply incompatible matrices.");
|
emitError("Attempt to multiply incompatible matrices.");
|
||||||
|
|
||||||
for (int i = 0; i < rhsRank - 2; ++i)
|
for (decltype(rhsRank) i = 0; i < rhsRank - 2; ++i)
|
||||||
dims.emplace_back(rhsShape[i]);
|
dims.emplace_back(rhsShape[i]);
|
||||||
dims.emplace_back(rhsShape[rhsRank - 1]);
|
dims.emplace_back(rhsShape[rhsRank - 1]);
|
||||||
} else if (lhsShape.size() >= 2 && rhsShape.size() == 1) {
|
} else if (lhsShape.size() >= 2 && rhsShape.size() == 1) {
|
||||||
|
@ -460,7 +460,7 @@ void ONNXMatMulOp::inferShapes() {
|
||||||
lhsShape[lhsRank - 1] != rhsShape[0])
|
lhsShape[lhsRank - 1] != rhsShape[0])
|
||||||
emitError("Attempt to multiply incompatible matrices.");
|
emitError("Attempt to multiply incompatible matrices.");
|
||||||
|
|
||||||
for (int i = 0; i < lhsRank - 2; ++i)
|
for (decltype(lhsRank) i = 0; i < lhsRank - 2; ++i)
|
||||||
dims.emplace_back(lhsShape[i]);
|
dims.emplace_back(lhsShape[i]);
|
||||||
dims.emplace_back(lhsShape[lhsRank - 2]);
|
dims.emplace_back(lhsShape[lhsRank - 2]);
|
||||||
} else if (lhsShape.size() > 2 && rhsShape.size() == 2) {
|
} else if (lhsShape.size() > 2 && rhsShape.size() == 2) {
|
||||||
|
@ -474,7 +474,7 @@ void ONNXMatMulOp::inferShapes() {
|
||||||
lhsShape[lhsRank - 1] != rhsShape[0])
|
lhsShape[lhsRank - 1] != rhsShape[0])
|
||||||
emitError("Attempt to multiply incompatible matrices.");
|
emitError("Attempt to multiply incompatible matrices.");
|
||||||
|
|
||||||
for (int i = 0; i < lhsRank - 1; ++i)
|
for (decltype(lhsRank) i = 0; i < lhsRank - 1; ++i)
|
||||||
dims.emplace_back(lhsShape[i]);
|
dims.emplace_back(lhsShape[i]);
|
||||||
dims.emplace_back(rhsShape[1]);
|
dims.emplace_back(rhsShape[1]);
|
||||||
} else if (lhsShape.size() == 2 && rhsShape.size() > 2) {
|
} else if (lhsShape.size() == 2 && rhsShape.size() > 2) {
|
||||||
|
@ -488,7 +488,7 @@ void ONNXMatMulOp::inferShapes() {
|
||||||
lhsShape[1] != rhsShape[rhsRank - 2])
|
lhsShape[1] != rhsShape[rhsRank - 2])
|
||||||
emitError("Attempt to multiply incompatible matrices.");
|
emitError("Attempt to multiply incompatible matrices.");
|
||||||
|
|
||||||
for (int i = 0; i < rhsRank - 2; ++i)
|
for (decltype(rhsRank) i = 0; i < rhsRank - 2; ++i)
|
||||||
dims.emplace_back(rhsShape[i]);
|
dims.emplace_back(rhsShape[i]);
|
||||||
dims.emplace_back(lhsShape[0]);
|
dims.emplace_back(lhsShape[0]);
|
||||||
dims.emplace_back(rhsShape[rhsRank - 1]);
|
dims.emplace_back(rhsShape[rhsRank - 1]);
|
||||||
|
@ -506,10 +506,10 @@ void ONNXMatMulOp::inferShapes() {
|
||||||
|
|
||||||
// Check and perform broadcasting for the shapes.
|
// Check and perform broadcasting for the shapes.
|
||||||
SmallVector<int64_t, 2> lhsBcastShape;
|
SmallVector<int64_t, 2> lhsBcastShape;
|
||||||
for (int i = 0; i < lhsRank - 2; ++i)
|
for (decltype(lhsRank) i = 0; i < lhsRank - 2; ++i)
|
||||||
lhsBcastShape.emplace_back(lhsShape[i]);
|
lhsBcastShape.emplace_back(lhsShape[i]);
|
||||||
SmallVector<int64_t, 2> rhsBcastShape;
|
SmallVector<int64_t, 2> rhsBcastShape;
|
||||||
for (int i = 0; i < rhsRank - 2; ++i)
|
for (decltype(rhsRank) i = 0; i < rhsRank - 2; ++i)
|
||||||
rhsBcastShape.emplace_back(rhsShape[i]);
|
rhsBcastShape.emplace_back(rhsShape[i]);
|
||||||
if (!getBroadcastedShape(lhsBcastShape, rhsBcastShape, dims))
|
if (!getBroadcastedShape(lhsBcastShape, rhsBcastShape, dims))
|
||||||
emitError("Broadcasted dimensions are incompatible.");
|
emitError("Broadcasted dimensions are incompatible.");
|
||||||
|
@ -730,9 +730,9 @@ void ONNXConvNoBiasOp::inferShapes() {
|
||||||
auto dataShape = dataTy.getShape();
|
auto dataShape = dataTy.getShape();
|
||||||
auto weightShape = weightTy.getShape();
|
auto weightShape = weightTy.getShape();
|
||||||
|
|
||||||
// Lowest ranked input supported is of shape (N x C x H x W).
|
// Lowest supported convolution is a one dimensional convolution.
|
||||||
if (dataShape.size() < 4)
|
if (dataShape.size() < 3)
|
||||||
emitError("Data input shape must be at least (NxCxHxW).");
|
emitError("Data input shape must be at least (NxCxD1).");
|
||||||
|
|
||||||
// Check that shape of weight and data have same length.
|
// Check that shape of weight and data have same length.
|
||||||
if (dataShape.size() != weightShape.size())
|
if (dataShape.size() != weightShape.size())
|
||||||
|
|
|
@ -284,7 +284,7 @@ getBroadcastedDimInfo(Location loc, ConversionPatternRewriter &rewriter,
|
||||||
auto dim = rewriter.create<DimOp>(loc, operands[i], j).getResult();
|
auto dim = rewriter.create<DimOp>(loc, operands[i], j).getResult();
|
||||||
auto one = rewriter.create<ConstantIndexOp>(loc, 1);
|
auto one = rewriter.create<ConstantIndexOp>(loc, 1);
|
||||||
auto isBroadcasted =
|
auto isBroadcasted =
|
||||||
rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
|
rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
|
||||||
broadcastedDims.insert(std::make_pair(j, isBroadcasted));
|
broadcastedDims.insert(std::make_pair(j, isBroadcasted));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -588,9 +588,11 @@ Value mapToLowerScalarOp<ONNXHardSigmoidOp>(
|
||||||
// Constant 1)
|
// Constant 1)
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
Value operand = operands[0];
|
Value operand = operands[0];
|
||||||
auto alphaAttribute = FloatAttr::get(rewriter.getF32Type(),
|
auto alphaAttribute = FloatAttr::get(
|
||||||
|
rewriter.getF32Type(),
|
||||||
llvm::dyn_cast<ONNXHardSigmoidOp>(op).alpha().convertToFloat());
|
llvm::dyn_cast<ONNXHardSigmoidOp>(op).alpha().convertToFloat());
|
||||||
auto betaAttribute = FloatAttr::get(rewriter.getF32Type(),
|
auto betaAttribute = FloatAttr::get(
|
||||||
|
rewriter.getF32Type(),
|
||||||
llvm::dyn_cast<ONNXHardSigmoidOp>(op).beta().convertToFloat());
|
llvm::dyn_cast<ONNXHardSigmoidOp>(op).beta().convertToFloat());
|
||||||
auto elementType = result_types[0];
|
auto elementType = result_types[0];
|
||||||
|
|
||||||
|
@ -625,8 +627,9 @@ Value mapToLowerScalarOp<ONNXEluOp>(Operation *op, ArrayRef<Type> result_types,
|
||||||
Value operand = operands[0];
|
Value operand = operands[0];
|
||||||
auto elementType = result_types[0];
|
auto elementType = result_types[0];
|
||||||
|
|
||||||
auto alphaAttribute = FloatAttr::get(rewriter.getF32Type(),
|
auto alphaAttribute =
|
||||||
llvm::dyn_cast<ONNXEluOp>(op).alpha().convertToFloat());
|
FloatAttr::get(rewriter.getF32Type(),
|
||||||
|
llvm::dyn_cast<ONNXEluOp>(op).alpha().convertToFloat());
|
||||||
auto zero = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 0));
|
auto zero = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 0));
|
||||||
auto one = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 1));
|
auto one = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 1));
|
||||||
auto alpha = rewriter.create<ConstantOp>(loc, alphaAttribute);
|
auto alpha = rewriter.create<ConstantOp>(loc, alphaAttribute);
|
||||||
|
@ -679,7 +682,8 @@ Value mapToLowerScalarOp<ONNXLeakyReluOp>(Operation *op,
|
||||||
Value operand = operands[0];
|
Value operand = operands[0];
|
||||||
auto elementType = result_types[0];
|
auto elementType = result_types[0];
|
||||||
|
|
||||||
auto alphaAttribute = FloatAttr::get(rewriter.getF32Type(),
|
auto alphaAttribute = FloatAttr::get(
|
||||||
|
rewriter.getF32Type(),
|
||||||
llvm::dyn_cast<ONNXLeakyReluOp>(op).alpha().convertToFloat());
|
llvm::dyn_cast<ONNXLeakyReluOp>(op).alpha().convertToFloat());
|
||||||
auto zero = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 0));
|
auto zero = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 0));
|
||||||
auto alpha = rewriter.create<ConstantOp>(loc, alphaAttribute);
|
auto alpha = rewriter.create<ConstantOp>(loc, alphaAttribute);
|
||||||
|
@ -705,10 +709,12 @@ Value mapToLowerScalarOp<ONNXSeluOp>(Operation *op, ArrayRef<Type> result_types,
|
||||||
// alpha)))
|
// alpha)))
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
Value operand = operands[0];
|
Value operand = operands[0];
|
||||||
auto alphaAttribute = FloatAttr::get(rewriter.getF32Type(),
|
auto alphaAttribute =
|
||||||
llvm::dyn_cast<ONNXSeluOp>(op).alpha().convertToFloat());
|
FloatAttr::get(rewriter.getF32Type(),
|
||||||
auto gammaAttribute = FloatAttr::get(rewriter.getF32Type(),
|
llvm::dyn_cast<ONNXSeluOp>(op).alpha().convertToFloat());
|
||||||
llvm::dyn_cast<ONNXSeluOp>(op).gamma().convertToFloat());
|
auto gammaAttribute =
|
||||||
|
FloatAttr::get(rewriter.getF32Type(),
|
||||||
|
llvm::dyn_cast<ONNXSeluOp>(op).gamma().convertToFloat());
|
||||||
auto elementType = result_types[0];
|
auto elementType = result_types[0];
|
||||||
|
|
||||||
auto zero = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 0));
|
auto zero = rewriter.create<ConstantOp>(loc, FloatAttr::get(elementType, 0));
|
||||||
|
@ -748,9 +754,10 @@ Value mapToLowerScalarOp<ONNXReciprocalOp>(
|
||||||
// Scalar unary ops for lowering ONNXSoftplusOp
|
// Scalar unary ops for lowering ONNXSoftplusOp
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
template <>
|
template <>
|
||||||
Value mapToLowerScalarOp<ONNXSoftplusOp>(
|
Value mapToLowerScalarOp<ONNXSoftplusOp>(Operation *op,
|
||||||
Operation *op, ArrayRef<Type> result_types, ArrayRef<Value> operands,
|
ArrayRef<Type> result_types,
|
||||||
ConversionPatternRewriter &rewriter) {
|
ArrayRef<Value> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) {
|
||||||
// ONNXSoftplusOp(%X) = LogOp(AddFOp(ExpOp(%X), ConstantOp 1))
|
// ONNXSoftplusOp(%X) = LogOp(AddFOp(ExpOp(%X), ConstantOp 1))
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
Value operand = operands[0];
|
Value operand = operands[0];
|
||||||
|
@ -768,9 +775,10 @@ Value mapToLowerScalarOp<ONNXSoftplusOp>(
|
||||||
// Scalar unary ops for lowering ONNXSoftsignOp
|
// Scalar unary ops for lowering ONNXSoftsignOp
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
template <>
|
template <>
|
||||||
Value mapToLowerScalarOp<ONNXSoftsignOp>(
|
Value mapToLowerScalarOp<ONNXSoftsignOp>(Operation *op,
|
||||||
Operation *op, ArrayRef<Type> result_types, ArrayRef<Value> operands,
|
ArrayRef<Type> result_types,
|
||||||
ConversionPatternRewriter &rewriter) {
|
ArrayRef<Value> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) {
|
||||||
// ONNXSoftsignOp(%X) = DivFOp(ConstantOp 1, %X)
|
// ONNXSoftsignOp(%X) = DivFOp(ConstantOp 1, %X)
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
Value operand = operands[0];
|
Value operand = operands[0];
|
||||||
|
@ -1408,6 +1416,337 @@ struct ONNXReshapeOpLowering : public ConversionPattern {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ONNXMatMulOpLowering : public ConversionPattern {
|
||||||
|
ONNXMatMulOpLowering(MLIRContext *ctx)
|
||||||
|
: ConversionPattern(mlir::ONNXMatMulOp::getOperationName(), 1, ctx) {}
|
||||||
|
|
||||||
|
PatternMatchResult
|
||||||
|
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) const final {
|
||||||
|
auto tensorType = (*op->result_type_begin()).cast<TensorType>();
|
||||||
|
auto loc = op->getLoc();
|
||||||
|
|
||||||
|
Value A = operands[0];
|
||||||
|
Value B = operands[1];
|
||||||
|
auto AShape = A.getType().cast<MemRefType>().getShape();
|
||||||
|
auto BShape = B.getType().cast<MemRefType>().getShape();
|
||||||
|
|
||||||
|
// There are three cases related to the shapes of the two arguments:
|
||||||
|
// - Both arguments are N-D, N >= 2
|
||||||
|
// - Either argument is 1-D, the other is N-D, N >= 2
|
||||||
|
// - Both arguments are 1-D
|
||||||
|
|
||||||
|
// Result type
|
||||||
|
auto memRefType = convertTensorToMemRef(tensorType);
|
||||||
|
auto elementType = memRefType.getElementType();
|
||||||
|
auto memRefShape = memRefType.getShape();
|
||||||
|
|
||||||
|
// A value zero
|
||||||
|
Value zero;
|
||||||
|
if (elementType.isa<IntegerType>()) {
|
||||||
|
zero = rewriter.create<ConstantOp>(
|
||||||
|
loc, IntegerAttr::get(memRefType.getElementType(), 0));
|
||||||
|
} else if (elementType.isa<FloatType>()) {
|
||||||
|
zero = rewriter.create<ConstantOp>(
|
||||||
|
loc, FloatAttr::get(memRefType.getElementType(), 0));
|
||||||
|
} else {
|
||||||
|
emitError(loc, "unsupported element type");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert an allocation and deallocation for the result of this operation.
|
||||||
|
Value alloc;
|
||||||
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
if (hasAllConstantDimensions(memRefType))
|
||||||
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
|
else {
|
||||||
|
SmallVector<Value, 4> allocOperands;
|
||||||
|
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
||||||
|
// Both arguments are N-D, N >= 2
|
||||||
|
// (s1 x s2 x... x sK x M x K) MATMUL (K x N)
|
||||||
|
// =>
|
||||||
|
// (s1 x s2 x... x sK x M x N)
|
||||||
|
for (int i = 0; i < memRefShape.size() - 2; ++i) {
|
||||||
|
if (memRefShape[i] < 0) {
|
||||||
|
if ((AShape.size() == 2) && (BShape.size() > 2))
|
||||||
|
allocOperands.emplace_back(rewriter.create<DimOp>(loc, B, i));
|
||||||
|
else if ((AShape.size() > 2) && (BShape.size() == 2))
|
||||||
|
allocOperands.emplace_back(rewriter.create<DimOp>(loc, A, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (memRefShape[memRefShape.size() - 2] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, A, memRefShape.size() - 2);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
if (memRefShape[memRefShape.size() - 1] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, B, memRefShape.size() - 1);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
} else if (AShape.size() == 1 && BShape.size() >= 2) {
|
||||||
|
// Either argument is 1-D
|
||||||
|
// K MATMUL (s1 x s2 x... x sK x K x N)
|
||||||
|
// =>
|
||||||
|
// (s1 x s2 x... x sK x N)
|
||||||
|
for (int i = 0; i < memRefShape.size() - 1; ++i) {
|
||||||
|
if (memRefShape[i] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, B, i);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (memRefShape[memRefShape.size() - 1] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, B, BShape.size() - 1);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
} else if (AShape.size() >= 2 && BShape.size() == 1) {
|
||||||
|
// Either argument is 1-D
|
||||||
|
// (s1 x s2 x... x sK x M x K) MATMUL K
|
||||||
|
// =>
|
||||||
|
// (s1 x s2 x... x sK x M)
|
||||||
|
for (int i = 0; i < memRefShape.size() - 1; ++i) {
|
||||||
|
if (memRefShape[i] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, A, i);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (memRefShape[memRefShape.size() - 1] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, A, AShape.size() - 2);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
} else if (AShape.size() == 1 && BShape.size() == 1) {
|
||||||
|
// Both arguments are 1-D
|
||||||
|
if (memRefShape[0] < 0) {
|
||||||
|
auto dim = rewriter.create<DimOp>(loc, A, 0);
|
||||||
|
allocOperands.emplace_back(dim);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
emitError(loc, "Invalid shapes");
|
||||||
|
}
|
||||||
|
|
||||||
|
alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (AShape.size() >= 2 || BShape.size() >= 2) {
|
||||||
|
// Cases 1 and 2:
|
||||||
|
// - Both arguments are N-D, N >= 2
|
||||||
|
// - Either argument is 1-D, the other is N-D, N >= 2
|
||||||
|
|
||||||
|
// Define loops for batch dimensions.
|
||||||
|
std::vector<Value> originalLoops;
|
||||||
|
std::vector<Value> optimizedLoops;
|
||||||
|
Block *optimizationBlock = defineLoops(rewriter, loc, originalLoops,
|
||||||
|
optimizedLoops, memRefShape.size());
|
||||||
|
|
||||||
|
// Outer KrnlIterateOp
|
||||||
|
SmallVector<Value, 4> loopBatchIVs;
|
||||||
|
bool hasBatchLoop = false;
|
||||||
|
if (AShape.size() > 2 || BShape.size() > 2) {
|
||||||
|
SmallVector<int, 4> batchAxes;
|
||||||
|
int matmulResultDims =
|
||||||
|
((AShape.size() == 1 || BShape.size() == 1)) ? 1 : 2;
|
||||||
|
for (int i = 0; i < memRefShape.size() - matmulResultDims; ++i)
|
||||||
|
batchAxes.emplace_back(i);
|
||||||
|
|
||||||
|
std::vector<Value> outerLoops, optimizedOuterLoops;
|
||||||
|
outerLoops.reserve(batchAxes.size());
|
||||||
|
optimizedOuterLoops.reserve(batchAxes.size());
|
||||||
|
for (int i = 0; i < batchAxes.size(); ++i) {
|
||||||
|
outerLoops.push_back(originalLoops[i]);
|
||||||
|
optimizedOuterLoops.push_back(optimizedLoops[i]);
|
||||||
|
}
|
||||||
|
KrnlIterateOperandPack outerPack(rewriter, outerLoops,
|
||||||
|
optimizedOuterLoops);
|
||||||
|
for (int i = 0; i < batchAxes.size(); ++i) {
|
||||||
|
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
|
||||||
|
}
|
||||||
|
auto outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack);
|
||||||
|
|
||||||
|
// No optimization
|
||||||
|
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||||
|
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||||
|
|
||||||
|
// Insert instructions into the outer KrnlIterateOp.
|
||||||
|
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
|
||||||
|
rewriter.setInsertionPointToStart(&outerIterationBlock);
|
||||||
|
|
||||||
|
// Induction variables: non-matrix-multiplication variables.
|
||||||
|
for (auto arg : outerIterationBlock.getArguments()) {
|
||||||
|
loopBatchIVs.emplace_back(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
hasBatchLoop = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, we define loops for matrix multiplication.
|
||||||
|
|
||||||
|
// Create a KrnlIterateOp for matrix multiplication.
|
||||||
|
KrnlIterateOp matmulIterateOp;
|
||||||
|
std::vector<Value> matmulLoops, optimizedMatmulLoops;
|
||||||
|
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
||||||
|
// 2-D x 2-D. Result has two dimensions.
|
||||||
|
matmulLoops.reserve(2);
|
||||||
|
optimizedMatmulLoops.reserve(2);
|
||||||
|
for (int i = 2; i > 0; --i) {
|
||||||
|
matmulLoops.emplace_back(originalLoops[memRefShape.size() - i]);
|
||||||
|
optimizedMatmulLoops.emplace_back(
|
||||||
|
optimizedLoops[memRefShape.size() - i]);
|
||||||
|
}
|
||||||
|
KrnlIterateOperandPack matmulPack(rewriter, matmulLoops,
|
||||||
|
optimizedMatmulLoops);
|
||||||
|
for (int i = 2; i > 0; --i) {
|
||||||
|
addDimensionToPack(rewriter, loc, matmulPack, alloc,
|
||||||
|
memRefShape.size() - i);
|
||||||
|
}
|
||||||
|
matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, matmulPack);
|
||||||
|
} else {
|
||||||
|
// 1-D x 2-D, and vice versa. Result has one dimension.
|
||||||
|
matmulLoops.reserve(1);
|
||||||
|
optimizedMatmulLoops.reserve(1);
|
||||||
|
matmulLoops.emplace_back(originalLoops[memRefShape.size() - 1]);
|
||||||
|
optimizedMatmulLoops.emplace_back(
|
||||||
|
optimizedLoops[memRefShape.size() - 1]);
|
||||||
|
KrnlIterateOperandPack matmulPack(rewriter, matmulLoops,
|
||||||
|
optimizedMatmulLoops);
|
||||||
|
addDimensionToPack(rewriter, loc, matmulPack, alloc,
|
||||||
|
memRefShape.size() - 1);
|
||||||
|
matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, matmulPack);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hasBatchLoop) {
|
||||||
|
// No optimization
|
||||||
|
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||||
|
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert instructions into the matmul KrnlIterateOp.
|
||||||
|
Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front();
|
||||||
|
rewriter.setInsertionPointToStart(&matmulIterationBlock);
|
||||||
|
|
||||||
|
// Induction variables: M, N
|
||||||
|
SmallVector<Value, 4> loopMNIVs;
|
||||||
|
for (auto arg : matmulIterationBlock.getArguments()) {
|
||||||
|
loopMNIVs.emplace_back(arg);
|
||||||
|
}
|
||||||
|
// Induction variables for the final result.
|
||||||
|
SmallVector<Value, 4> loopBatchMNIVs;
|
||||||
|
for (auto arg : loopBatchIVs) {
|
||||||
|
loopBatchMNIVs.emplace_back(arg);
|
||||||
|
}
|
||||||
|
for (auto arg : loopMNIVs) {
|
||||||
|
loopBatchMNIVs.emplace_back(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the output with value 0.
|
||||||
|
rewriter.create<StoreOp>(loc, zero, alloc, loopBatchMNIVs);
|
||||||
|
|
||||||
|
// Iterate along the reduction dimension.
|
||||||
|
// Use a value from A.
|
||||||
|
std::vector<Value> reduceLoops;
|
||||||
|
std::vector<Value> optimizedReduceLoops;
|
||||||
|
Block *optimizationReduceBlock =
|
||||||
|
defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1);
|
||||||
|
KrnlIterateOperandPack reducePack(rewriter, reduceLoops,
|
||||||
|
optimizedReduceLoops);
|
||||||
|
addDimensionToPack(rewriter, loc, reducePack, A, AShape.size() - 1);
|
||||||
|
auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack);
|
||||||
|
|
||||||
|
// No optimization
|
||||||
|
rewriter.setInsertionPointToEnd(optimizationReduceBlock);
|
||||||
|
rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops);
|
||||||
|
|
||||||
|
// Insert instructions into the reduction KrnlIterateOp.
|
||||||
|
Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front();
|
||||||
|
rewriter.setInsertionPointToStart(&reduceIterationBlock);
|
||||||
|
|
||||||
|
// Induction variables
|
||||||
|
SmallVector<Value, 4> loopKIVs, loopBatchMKIVs, loopBatchKNIVs;
|
||||||
|
// K
|
||||||
|
loopKIVs.emplace_back(reduceIterationBlock.getArguments()[0]);
|
||||||
|
// MK
|
||||||
|
if (AShape.size() > 2)
|
||||||
|
for (auto arg : loopBatchIVs)
|
||||||
|
loopBatchMKIVs.emplace_back(arg);
|
||||||
|
if (AShape.size() >= 2)
|
||||||
|
loopBatchMKIVs.emplace_back(loopMNIVs[0]);
|
||||||
|
loopBatchMKIVs.emplace_back(loopKIVs[0]);
|
||||||
|
// KN
|
||||||
|
if (BShape.size() > 2)
|
||||||
|
for (auto arg : loopBatchIVs)
|
||||||
|
loopBatchKNIVs.emplace_back(arg);
|
||||||
|
loopBatchKNIVs.emplace_back(loopKIVs[0]);
|
||||||
|
if (BShape.size() >= 2)
|
||||||
|
if (AShape.size() >= 2)
|
||||||
|
loopBatchKNIVs.emplace_back(loopMNIVs[1]);
|
||||||
|
else
|
||||||
|
loopBatchKNIVs.emplace_back(loopMNIVs[0]);
|
||||||
|
|
||||||
|
// Matmul computation
|
||||||
|
auto loadedA = rewriter.create<LoadOp>(loc, A, loopBatchMKIVs);
|
||||||
|
auto loadedB = rewriter.create<LoadOp>(loc, B, loopBatchKNIVs);
|
||||||
|
auto loadedY = rewriter.create<LoadOp>(loc, alloc, loopBatchMNIVs);
|
||||||
|
if (elementType.isa<IntegerType>()) {
|
||||||
|
auto AB = rewriter.create<MulIOp>(loc, loadedA, loadedB);
|
||||||
|
auto accumulated = rewriter.create<AddIOp>(loc, loadedY, AB);
|
||||||
|
rewriter.create<StoreOp>(loc, accumulated, alloc, loopBatchMNIVs);
|
||||||
|
} else if (elementType.isa<FloatType>()) {
|
||||||
|
auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB);
|
||||||
|
auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB);
|
||||||
|
rewriter.create<StoreOp>(loc, accumulated, alloc, loopBatchMNIVs);
|
||||||
|
}
|
||||||
|
} else if ((AShape.size() == 1) && (BShape.size() == 1)) {
|
||||||
|
// Case 3:
|
||||||
|
// - Both arguments are 1-D
|
||||||
|
|
||||||
|
// Fill the output with value 0.
|
||||||
|
Value zeroIndex = rewriter.create<ConstantIndexOp>(loc, 0);
|
||||||
|
rewriter.create<StoreOp>(loc, zero, alloc, zeroIndex);
|
||||||
|
|
||||||
|
// Iterate along the reduction dimension.
|
||||||
|
// Use a value from A.
|
||||||
|
std::vector<Value> reduceLoops;
|
||||||
|
std::vector<Value> optimizedReduceLoops;
|
||||||
|
Block *optimizationReduceBlock =
|
||||||
|
defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1);
|
||||||
|
KrnlIterateOperandPack reducePack(rewriter, reduceLoops,
|
||||||
|
optimizedReduceLoops);
|
||||||
|
addDimensionToPack(rewriter, loc, reducePack, A, 0);
|
||||||
|
auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack);
|
||||||
|
|
||||||
|
// No optimization
|
||||||
|
rewriter.setInsertionPointToEnd(optimizationReduceBlock);
|
||||||
|
rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops);
|
||||||
|
|
||||||
|
// Insert instructions into the reduction KrnlIterateOp.
|
||||||
|
Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front();
|
||||||
|
rewriter.setInsertionPointToStart(&reduceIterationBlock);
|
||||||
|
|
||||||
|
// Induction variables
|
||||||
|
SmallVector<Value, 4> loopKIVs;
|
||||||
|
// K
|
||||||
|
loopKIVs.emplace_back(reduceIterationBlock.getArgument(0));
|
||||||
|
|
||||||
|
// Matmul computation
|
||||||
|
auto loadedA = rewriter.create<LoadOp>(loc, A, loopKIVs);
|
||||||
|
auto loadedB = rewriter.create<LoadOp>(loc, B, loopKIVs);
|
||||||
|
auto loadedY = rewriter.create<LoadOp>(loc, alloc, zeroIndex);
|
||||||
|
if (elementType.isa<IntegerType>()) {
|
||||||
|
auto AB = rewriter.create<MulIOp>(loc, loadedA, loadedB);
|
||||||
|
auto accumulated = rewriter.create<AddIOp>(loc, loadedY, AB);
|
||||||
|
rewriter.create<StoreOp>(loc, accumulated, alloc, zeroIndex);
|
||||||
|
} else if (elementType.isa<FloatType>()) {
|
||||||
|
auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB);
|
||||||
|
auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB);
|
||||||
|
rewriter.create<StoreOp>(loc, accumulated, alloc, zeroIndex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No scalar matrix multiplication.
|
||||||
|
llvm_unreachable("Unsupported scalar matrix multiplication.");
|
||||||
|
}
|
||||||
|
|
||||||
|
rewriter.replaceOp(op, alloc);
|
||||||
|
|
||||||
|
return matchSuccess();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct ONNXGemmOpLowering : public ConversionPattern {
|
struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
ONNXGemmOpLowering(MLIRContext *ctx)
|
ONNXGemmOpLowering(MLIRContext *ctx)
|
||||||
: ConversionPattern(mlir::ONNXGemmOp::getOperationName(), 1, ctx) {}
|
: ConversionPattern(mlir::ONNXGemmOp::getOperationName(), 1, ctx) {}
|
||||||
|
@ -1423,10 +1762,12 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
B = operands[1];
|
B = operands[1];
|
||||||
C = operands[2];
|
C = operands[2];
|
||||||
|
|
||||||
auto alphaAttr = FloatAttr::get(tensorType.getElementType(),
|
auto alphaAttr =
|
||||||
llvm::dyn_cast<ONNXGemmOp>(op).alpha().convertToFloat());
|
FloatAttr::get(tensorType.getElementType(),
|
||||||
auto betaAttr = FloatAttr::get(tensorType.getElementType(),
|
llvm::dyn_cast<ONNXGemmOp>(op).alpha().convertToFloat());
|
||||||
llvm::dyn_cast<ONNXGemmOp>(op).beta().convertToFloat());
|
auto betaAttr =
|
||||||
|
FloatAttr::get(tensorType.getElementType(),
|
||||||
|
llvm::dyn_cast<ONNXGemmOp>(op).beta().convertToFloat());
|
||||||
auto alpha = rewriter.create<ConstantOp>(loc, alphaAttr);
|
auto alpha = rewriter.create<ConstantOp>(loc, alphaAttr);
|
||||||
auto beta = rewriter.create<ConstantOp>(loc, betaAttr);
|
auto beta = rewriter.create<ConstantOp>(loc, betaAttr);
|
||||||
|
|
||||||
|
@ -1482,8 +1823,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
outerLoops.push_back(originalLoops[i]);
|
outerLoops.push_back(originalLoops[i]);
|
||||||
optimizedOuterLoops.push_back(optimizedLoops[i]);
|
optimizedOuterLoops.push_back(optimizedLoops[i]);
|
||||||
}
|
}
|
||||||
KrnlIterateOperandPack outerPack(rewriter, outerLoops,
|
KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops);
|
||||||
optimizedOuterLoops);
|
|
||||||
// Induction variables for the outer loops
|
// Induction variables for the outer loops
|
||||||
for (int i = 0; i < 2; ++i)
|
for (int i = 0; i < 2; ++i)
|
||||||
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
|
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
|
||||||
|
@ -1501,17 +1841,16 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
// If it failed then use a dynamic value.
|
// If it failed then use a dynamic value.
|
||||||
auto ATy = A.getType().cast<MemRefType>();
|
auto ATy = A.getType().cast<MemRefType>();
|
||||||
auto BTy = B.getType().cast<MemRefType>();
|
auto BTy = B.getType().cast<MemRefType>();
|
||||||
int64_t K_A_Idx = (isTransA) ? 0 : 1;
|
int K_A_Idx = (isTransA) ? 0 : 1;
|
||||||
int64_t K_B_Idx = (isTransB) ? 1 : 0;
|
int K_B_Idx = (isTransB) ? 1 : 0;
|
||||||
reductionPack.pushConstantBound(0);
|
reductionPack.pushConstantBound(0);
|
||||||
if (ATy.getShape()[K_A_Idx] != -1)
|
if (ATy.getShape()[K_A_Idx] != -1)
|
||||||
reductionPack.pushConstantBound(ATy.getShape()[K_A_Idx]);
|
reductionPack.pushConstantBound(ATy.getShape()[K_A_Idx]);
|
||||||
|
else if (BTy.getShape()[K_B_Idx] != -1)
|
||||||
|
reductionPack.pushConstantBound(BTy.getShape()[K_B_Idx]);
|
||||||
else
|
else
|
||||||
if (BTy.getShape()[K_B_Idx] != -1)
|
reductionPack.pushOperandBound(
|
||||||
reductionPack.pushConstantBound(BTy.getShape()[K_B_Idx]);
|
rewriter.create<DimOp>(loc, B, K_B_Idx).getResult());
|
||||||
else
|
|
||||||
reductionPack.pushOperandBound(
|
|
||||||
rewriter.create<DimOp>(loc, B, K_B_Idx).getResult());
|
|
||||||
|
|
||||||
// Get run-time dimension information for unknown dimensions used for
|
// Get run-time dimension information for unknown dimensions used for
|
||||||
// broadcasting.
|
// broadcasting.
|
||||||
|
@ -1524,7 +1863,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
auto dim = rewriter.create<DimOp>(loc, C, i).getResult();
|
auto dim = rewriter.create<DimOp>(loc, C, i).getResult();
|
||||||
auto one = rewriter.create<ConstantIndexOp>(loc, 1);
|
auto one = rewriter.create<ConstantIndexOp>(loc, 1);
|
||||||
auto isBroadcasted =
|
auto isBroadcasted =
|
||||||
rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
|
rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
|
||||||
broadcastedDimInfo.insert(std::make_pair(i, isBroadcasted));
|
broadcastedDimInfo.insert(std::make_pair(i, isBroadcasted));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1557,8 +1896,8 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack);
|
auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack);
|
||||||
|
|
||||||
// Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
|
// Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
|
||||||
auto loopCIVs = getLoopIVsForBroadcasting(
|
auto loopCIVs = getLoopIVsForBroadcasting(loc, rewriter, loopMNIVs, C,
|
||||||
loc, rewriter, loopMNIVs, C, broadcastedDimInfo);
|
broadcastedDimInfo);
|
||||||
auto loadedC = rewriter.create<LoadOp>(loc, C, loopCIVs);
|
auto loadedC = rewriter.create<LoadOp>(loc, C, loopCIVs);
|
||||||
auto loadedAB = rewriter.create<LoadOp>(loc, alloc, loopMNIVs);
|
auto loadedAB = rewriter.create<LoadOp>(loc, alloc, loopMNIVs);
|
||||||
auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB);
|
auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB);
|
||||||
|
@ -1650,8 +1989,8 @@ struct ONNXUnsqueezeOpLowering : public ConversionPattern {
|
||||||
Value dimVal = nullptr;
|
Value dimVal = nullptr;
|
||||||
if (memRefShape[outIdx] < 0) {
|
if (memRefShape[outIdx] < 0) {
|
||||||
Value index = rewriter.create<DimOp>(loc, operands[0], inIdx);
|
Value index = rewriter.create<DimOp>(loc, operands[0], inIdx);
|
||||||
dimVal = rewriter.create<IndexCastOp>(
|
dimVal = rewriter.create<IndexCastOp>(loc, index,
|
||||||
loc, index, rewriter.getIntegerType(64));
|
rewriter.getIntegerType(64));
|
||||||
allocOperands.emplace_back(index);
|
allocOperands.emplace_back(index);
|
||||||
} else {
|
} else {
|
||||||
dimVal = rewriter.create<ConstantOp>(
|
dimVal = rewriter.create<ConstantOp>(
|
||||||
|
@ -1739,7 +2078,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
|
||||||
// the default case). This means that perm was added by shape
|
// the default case). This means that perm was added by shape
|
||||||
// inference or another pass to contain the values corresponding
|
// inference or another pass to contain the values corresponding
|
||||||
// to the default behavior of Transpose.
|
// to the default behavior of Transpose.
|
||||||
for (int i = iterationBlock.getArguments().size()-1; i >= 0; i--)
|
for (int i = iterationBlock.getArguments().size() - 1; i >= 0; i--)
|
||||||
perm.emplace_back(i);
|
perm.emplace_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1748,7 +2087,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
|
||||||
inLoopIVs.emplace_back(arg);
|
inLoopIVs.emplace_back(arg);
|
||||||
|
|
||||||
SmallVector<Value, 4> outLoopIVs;
|
SmallVector<Value, 4> outLoopIVs;
|
||||||
for (int i=0; i<iterationBlock.getArguments().size(); ++i)
|
for (int i = 0; i < iterationBlock.getArguments().size(); ++i)
|
||||||
outLoopIVs.emplace_back(iterationBlock.getArguments()[perm[i]]);
|
outLoopIVs.emplace_back(iterationBlock.getArguments()[perm[i]]);
|
||||||
|
|
||||||
auto inVal = rewriter.create<LoadOp>(loc, operands[0], inLoopIVs);
|
auto inVal = rewriter.create<LoadOp>(loc, operands[0], inLoopIVs);
|
||||||
|
@ -2362,8 +2701,8 @@ void FrontendToKrnlLoweringPass::runOnModule() {
|
||||||
ONNXReductionOpLowering<mlir::ONNXReduceSumOp>,
|
ONNXReductionOpLowering<mlir::ONNXReduceSumOp>,
|
||||||
ONNXSoftmaxOpLowering, ONNXGemmOpLowering,
|
ONNXSoftmaxOpLowering, ONNXGemmOpLowering,
|
||||||
ONNXUnsqueezeOpLowering, ONNXTransposeOpLowering,
|
ONNXUnsqueezeOpLowering, ONNXTransposeOpLowering,
|
||||||
ONNXIdentityOpLowering, ONNXConvNoBiasOpLowering
|
ONNXIdentityOpLowering, ONNXConvNoBiasOpLowering,
|
||||||
>(&getContext());
|
ONNXMatMulOpLowering>(&getContext());
|
||||||
|
|
||||||
// With the target and rewrite patterns defined, we can now attempt the
|
// With the target and rewrite patterns defined, we can now attempt the
|
||||||
// conversion. The conversion will signal failure if any of our `illegal`
|
// conversion. The conversion will signal failure if any of our `illegal`
|
||||||
|
|
|
@ -295,6 +295,12 @@ test_to_enable = [
|
||||||
|
|
||||||
# Sign Op:
|
# Sign Op:
|
||||||
"test_sign_cpu",
|
"test_sign_cpu",
|
||||||
|
|
||||||
|
# MatmulOp
|
||||||
|
"test_matmul_2d_cpu",
|
||||||
|
"test_matmul_3d_cpu",
|
||||||
|
"test_matmul_4d_cpu",
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Extract name of all test cases.
|
# Extract name of all test cases.
|
||||||
|
|
|
@ -930,6 +930,223 @@ func @test_sign_i(%arg0 : tensor<?x10xi32>) -> tensor<*xi32> {
|
||||||
// CHECK: return [[RES]] : memref<?x10xi32>
|
// CHECK: return [[RES]] : memref<?x10xi32>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 2-D x 2-D
|
||||||
|
func @test_matmul1(%arg0 : tensor<10x5xf32>, %arg1 : tensor<5x10xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<10x5xf32>, tensor<5x10xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul1
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1
|
||||||
|
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 10, [[LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2, %arg4] : memref<10x5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg4, %arg3] : memref<5x10xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<10x10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-D x N-D
|
||||||
|
func @test_matmul2(%arg0 : tensor<10x5xf32>, %arg1 : tensor<2x3x5x10xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<10x5xf32>, tensor<2x3x5x10xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul2
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3
|
||||||
|
// CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop)
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) {
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) {
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg4, %arg6] : memref<10x5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<2x3x10x10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// N-D x N-D
|
||||||
|
func @test_matmul3(%arg0 : tensor<2x3x10x5xf32>, %arg1 : tensor<2x3x5x10xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<2x3x10x5xf32>, tensor<2x3x5x10xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul3
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3
|
||||||
|
// CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop)
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) {
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) {
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2, %arg3, %arg4, %arg6] : memref<2x3x10x5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<2x3x10x10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1-D x 2-D
|
||||||
|
func @test_matmul4(%arg0 : tensor<5xf32>, %arg1 : tensor<5x10xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<5xf32>, tensor<5x10xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul4
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<10xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]) with ([[LOOPS]] -> %arg2 = 0 to 10) {
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%arg2] : memref<10xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg3 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg3] : memref<5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg3, %arg2] : memref<5x10xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2] : memref<10xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%arg2] : memref<10xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1-D x N-D
|
||||||
|
func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor<?x5x10xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<5xf32>, tensor<?x5x10xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul5
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim %arg1, 0 : memref<?x5x10xf32>
|
||||||
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1
|
||||||
|
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||||
|
// CHECK: [[DIM_1:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) {
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg4] : memref<5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2, %arg4, %arg3] : memref<?x5x10xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<?x10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// N-D x 1-D
|
||||||
|
func @test_matmul6(%arg0 : tensor<?x10x5xf32>, %arg1 : tensor<5xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<?x10x5xf32>, tensor<5xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul6
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10x5xf32>
|
||||||
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1
|
||||||
|
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||||
|
// CHECK: [[DIM_1:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) {
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2, %arg3, %arg4] : memref<?x10x5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg4] : memref<5xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<?x10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1-D x 1-D
|
||||||
|
func @test_matmul7(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.MatMul"(%arg0, %arg1) : (tensor<5xf32>, tensor<5xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_matmul7
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: %[[CONSTANT_INDEX:.+]] = constant 0 : index
|
||||||
|
// CHECK: store [[CONSTANT]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32>
|
||||||
|
// CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops {
|
||||||
|
// CHECK: krnl.return_loops [[LOOPS_REDUCE]]
|
||||||
|
// CHECK: } : () -> !krnl.loop
|
||||||
|
// CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg2 = 0 to 5) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2] : memref<5xf32>
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2] : memref<5xf32>
|
||||||
|
// CHECK: [[LOAD_RES:%.+]] = load [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32>
|
||||||
|
// CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32
|
||||||
|
// CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32
|
||||||
|
// CHECK: store [[ADD]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<1xf32>
|
||||||
|
}
|
||||||
|
|
||||||
func @test_conv_no_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
|
@ -143,27 +143,38 @@ func @test_matmul_10(%arg0 : tensor<?x42x32xf32>, %arg1 : tensor<32xf32>) -> ten
|
||||||
/// Test shape inference for ConvNoBias operation and all its attributes.
|
/// Test shape inference for ConvNoBias operation and all its attributes.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
/// Default and required attributes for 1-D convolution.
|
||||||
|
|
||||||
|
func @test_conv_no_bias_0(%arg0 : tensor<1x2x32xf32>, %arg1 : tensor<5x2x6xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32xf32>, tensor<5x2x6xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_conv_no_bias_0
|
||||||
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32xf32>, tensor<5x2x6xf32>) -> tensor<1x5x27xf32>
|
||||||
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x27xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// Default and required attributes.
|
/// Default and required attributes.
|
||||||
|
|
||||||
func @test_conv_no_bias_1(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_1(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_1
|
// CHECK-LABEL: test_conv_no_bias_1
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x27x58xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x27x58xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x27x58xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x27x58xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// kernel_shape attribute.
|
/// kernel_shape attribute.
|
||||||
|
|
||||||
func @test_conv_no_bias_2(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_2(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, kernel_shape = [8, 9]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, kernel_shape = [8, 9]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_2
|
// CHECK-LABEL: test_conv_no_bias_2
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, kernel_shape = [8, 9]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x25x56xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, kernel_shape = [8, 9]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x25x56xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x25x56xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x25x56xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// pads attribute.
|
/// pads attribute.
|
||||||
/// Use pads to make output size equal to input size by adding K - 1 to the result.
|
/// Use pads to make output size equal to input size by adding K - 1 to the result.
|
||||||
|
@ -171,53 +182,53 @@ func @test_conv_no_bias_2(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7x
|
||||||
func @test_conv_no_bias_3(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_3(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [2, 4, 3, 5]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [2, 4, 3, 5]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_3
|
// CHECK-LABEL: test_conv_no_bias_3
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [2, 4, 3, 5]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [2, 4, 3, 5]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// auto_pad set to SAME_UPPER and SAME_LOWER.
|
/// auto_pad set to SAME_UPPER and SAME_LOWER.
|
||||||
|
|
||||||
func @test_conv_no_bias_4(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_4(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_4
|
// CHECK-LABEL: test_conv_no_bias_4
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
func @test_conv_no_bias_5(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_5(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_LOWER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_LOWER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_5
|
// CHECK-LABEL: test_conv_no_bias_5
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_LOWER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_LOWER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// auto_pad set to VALID.
|
/// auto_pad set to VALID.
|
||||||
|
|
||||||
func @test_conv_no_bias_6(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_6(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "VALID", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "VALID", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_6
|
// CHECK-LABEL: test_conv_no_bias_6
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "VALID", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x27x55xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "VALID", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x27x55xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x27x55xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x27x55xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// With strides attribute.
|
/// With strides attribute.
|
||||||
|
|
||||||
func @test_conv_no_bias_7(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_7(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_7
|
// CHECK-LABEL: test_conv_no_bias_7
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x14x20xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x14x20xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x14x20xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x14x20xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// auto_pad set to SAME_UPPER with strides attribute.
|
/// auto_pad set to SAME_UPPER with strides attribute.
|
||||||
/// The auto_pad will pas as if stride is equal to 1.
|
/// The auto_pad will pas as if stride is equal to 1.
|
||||||
|
@ -225,33 +236,33 @@ func @test_conv_no_bias_7(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7x
|
||||||
func @test_conv_no_bias_8(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_8(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_8
|
// CHECK-LABEL: test_conv_no_bias_8
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x16x22xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x16x22xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x16x22xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x16x22xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// dilations attribute.
|
/// dilations attribute.
|
||||||
|
|
||||||
func @test_conv_no_bias_9(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_9(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, dilations = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, dilations = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_9
|
// CHECK-LABEL: test_conv_no_bias_9
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", dilations = [2, 3], group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x20x42xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", dilations = [2, 3], group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x20x42xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x20x42xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x20x42xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// dilations attribute with stride.
|
/// dilations attribute with stride.
|
||||||
|
|
||||||
func @test_conv_no_bias_10(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
func @test_conv_no_bias_10(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, dilations = [2, 3], strides = [2, 2]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, dilations = [2, 3], strides = [2, 2]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_conv_no_bias_10
|
// CHECK-LABEL: test_conv_no_bias_10
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", dilations = [2, 3], group = 1 : i64, strides = [2, 2]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x10x21xf32>
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", dilations = [2, 3], group = 1 : i64, strides = [2, 2]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x10x21xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x10x21xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x10x21xf32>
|
||||||
|
}
|
||||||
|
|
||||||
/// dilations attribute with auto_pad set to SAME_UPPER.
|
/// dilations attribute with auto_pad set to SAME_UPPER.
|
||||||
|
|
||||||
|
@ -259,10 +270,9 @@ func @test_conv_no_bias_11(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7
|
||||||
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, dilations = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
%0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, dilations = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
}
|
}
|
||||||
|
// CHECK-LABEL: test_conv_no_bias_11
|
||||||
// CHECK-LABEL: test_conv_no_bias_11
|
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", dilations = [2, 3], group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x32x64xf32>
|
||||||
// CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", dilations = [2, 3], group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x32x64xf32>
|
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
||||||
// CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
|
|
||||||
|
|
||||||
|
|
||||||
/// Test PadConstantValuePad_1
|
/// Test PadConstantValuePad_1
|
||||||
|
|
Loading…
Reference in New Issue