From f16e79d744cc5db8b8b94aeee6222e20442fae30 Mon Sep 17 00:00:00 2001 From: Gheorghe-Teodor Bercea Date: Wed, 1 Apr 2020 13:51:06 -0400 Subject: [PATCH] Emit constant tensors as global constants (#66) * Reorganize main function. * Follow review comments. * Emit constants are globals in Krnl and LLVM dialects. * Enable unique constant variable names. * Emit alloca for local array. Add tests. * Comment clean-up. * Simplify MemRef construction. * Fix output type. --- .../ONNXToKrnl/ConvertONNXToKrnl.cpp | 2 +- .../ONNXToKrnl/ONNXToKrnlCommon.cpp | 4 + .../ONNXToKrnl/ONNXToKrnlCommon.hpp | 2 + src/Conversion/ONNXToKrnl/Tensor/Constant.cpp | 81 +++----- src/Dialect/Krnl/KrnlOps.td | 13 ++ src/Transform/LowerKrnl.cpp | 1 + src/Transform/LowerToLLVM.cpp | 175 +++++++++++++++--- test/mlir/krnl/constant.mlir | 53 ++++++ test/mlir/onnx/onnx_lowering.mlir | 17 +- 9 files changed, 245 insertions(+), 103 deletions(-) create mode 100644 test/mlir/krnl/constant.mlir diff --git a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp index 0549587..662fc80 100644 --- a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp +++ b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp @@ -47,7 +47,7 @@ struct FrontendToKrnlLoweringPass } // end anonymous namespace. void FrontendToKrnlLoweringPass::runOnModule() { - auto module = getModule(); + ModuleOp module = getModule(); // The first thing to define is the conversion target. This will define the // final target for this lowering. diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp index 9eadcac..d79490b 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp @@ -485,3 +485,7 @@ Value emitNegativeInfinityConstantOp( return rewriter.create(loc, constantAttr); } + +int64_t ArrayAttrIntVal(ArrayAttr a, int i) { + return (a.getValue()[i]).cast().getInt(); +} \ No newline at end of file diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp index 7403cc4..f725b8d 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp @@ -117,6 +117,8 @@ Value emitPositiveInfinityConstantOp( Value emitNegativeInfinityConstantOp( ConversionPatternRewriter &rewriter, Location loc, Type type); +int64_t ArrayAttrIntVal(ArrayAttr a, int i); + //===----------------------------------------------------------------------===// // This is to get a scalar operation of a given type for a specific operation. //===----------------------------------------------------------------------===// diff --git a/src/Conversion/ONNXToKrnl/Tensor/Constant.cpp b/src/Conversion/ONNXToKrnl/Tensor/Constant.cpp index 7289354..8389047 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Constant.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Constant.cpp @@ -12,40 +12,13 @@ using namespace mlir; -template -void emitConstantAndStoreOpForDenseElementsAttr( - ConversionPatternRewriter &rewriter, Location loc, - DenseElementsAttr constantValue, ArrayRef valueShape, - ArrayRef constantIndices, Value alloc) { - // The following functor recursively walks the dimensions of the constant - // shape, generating a store when the recursion hits the base case. - SmallVector indices; - auto valueIt = constantValue.getValues().begin(); - std::function storeElements = [&](uint64_t dimension) { - // The last dimension is the base case of the recursion, at this point - // we store the element at the given index. - if (dimension == valueShape.size()) { - rewriter.create(loc, - rewriter.create(loc, *valueIt++), alloc, - llvm::makeArrayRef(indices)); - return; - } - - // Otherwise, iterate over the current dimension and add the indices to - // the list. - for (uint64_t i = 0, e = valueShape[dimension]; i != e; ++i) { - indices.push_back(constantIndices[i]); - storeElements(dimension + 1); - indices.pop_back(); - } - }; - // Start the element storing recursion from the first dimension. - storeElements(/*dimension=*/0); -} - struct ONNXConstantOpLowering : public ConversionPattern { + static int constantID; + ONNXConstantOpLowering(MLIRContext *ctx) - : ConversionPattern(mlir::ONNXConstantOp::getOperationName(), 1, ctx) {} + : ConversionPattern(mlir::ONNXConstantOp::getOperationName(), 1, ctx) { + constantID = 0; + } LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { @@ -58,42 +31,32 @@ struct ONNXConstantOpLowering : public ConversionPattern { auto memRefType = convertToMemRefType(*op->result_type_begin()); - Value alloc; - bool insertDealloc = checkInsertDealloc(op); + // Shape based computations. + auto shape = memRefType.getShape(); + int64_t numElements = 1; + for (int i=0; i(loc, + memRefType, + rewriter.getI64ArrayAttr(shape), + constantOp.value().getValue(), + rewriter.getStringAttr("constant_" + std::to_string(constantID))); - DenseElementsAttr constantValue = - constantOp.value().getValue().cast(); - - auto valueShape = memRefType.getShape(); - SmallVector constantIndices; - for (auto i : llvm::seq( - 0, *std::max_element(valueShape.begin(), valueShape.end()))) - constantIndices.push_back(rewriter.create(loc, i)); - - // The constant operation represents a multi-dimensional constant, so we - // will need to generate a store for each of the elements. - if (memRefType.getElementType().isa()) { - emitConstantAndStoreOpForDenseElementsAttr( - rewriter, loc, constantValue, valueShape, constantIndices, alloc); - } else if (memRefType.getElementType().isa()) { - emitConstantAndStoreOpForDenseElementsAttr( - rewriter, loc, constantValue, valueShape, constantIndices, alloc); - } else { - emitError(loc, "Unsupported output type"); - } + // Increment constant ID: + constantID++; // Replace this operation with the generated alloc. - rewriter.replaceOp(op, alloc); + // rewriter.replaceOp(op, alloc); + rewriter.replaceOp(op, constantGlobal.getResult()); return success(); } }; +int ONNXConstantOpLowering::constantID; + void populateLoweringONNXConstantOpPattern( OwningRewritePatternList &patterns, MLIRContext *ctx) { patterns.insert(ctx); diff --git a/src/Dialect/Krnl/KrnlOps.td b/src/Dialect/Krnl/KrnlOps.td index e4e73de..0e85886 100644 --- a/src/Dialect/Krnl/KrnlOps.td +++ b/src/Dialect/Krnl/KrnlOps.td @@ -192,3 +192,16 @@ def KrnlMemcpyOp : Op { let parser = ?; let printer = ?; } + +def KrnlGlobalOp : Op { + let summary = "Krnl global operation"; + let description = [{ + Operation for holding global data values. + }]; + + let arguments = (ins AnyAttr:$shape, AnyAttr:$value, StrAttr:$name); + let results = (outs AnyTypeOf<[AnyMemRef]>:$output); + + let parser = ?; + let printer = ?; +} diff --git a/src/Transform/LowerKrnl.cpp b/src/Transform/LowerKrnl.cpp index 7f24bf4..9aca349 100644 --- a/src/Transform/LowerKrnl.cpp +++ b/src/Transform/LowerKrnl.cpp @@ -154,6 +154,7 @@ void KrnlToAffineLoweringPass::runOnFunction() { target.addIllegalDialect(); target.addLegalOp(); target.addLegalOp(); + target.addLegalOp(); OwningRewritePatternList patterns; patterns.insert("llvm.memcpy.p0i8.p0i8.i64")) + return SymbolRefAttr::get("llvm.memcpy.p0i8.p0i8.i64", context); + // Create a function declaration for memcpy, the signature is: + // * `void (i8*, i8* , i64, i1)` + auto llvmVoidTy = LLVM::LLVMType::getVoidTy(llvmDialect); + auto llvmI8PtrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect); + auto llvmI64Ty = LLVM::LLVMType::getInt64Ty(llvmDialect); + auto llvmI1Ty = LLVM::LLVMType::getInt1Ty(llvmDialect); + auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmVoidTy, + ArrayRef( + {llvmI8PtrTy, llvmI8PtrTy, llvmI64Ty, llvmI1Ty}), + false); + + // Insert the memcpy function into the body of the parent module. + PatternRewriter::InsertionGuard insertGuard(rewriter); + rewriter.setInsertionPointToStart(module.getBody()); + rewriter.create( + module.getLoc(), "llvm.memcpy.p0i8.p0i8.i64", llvmFnType); + return SymbolRefAttr::get("llvm.memcpy.p0i8.p0i8.i64", context); +} + +//===----------------------------------------------------------------------===// +// KRNL to LLVM: KrnlGlobalOpLowering +//===----------------------------------------------------------------------===// + +class KrnlGlobalOpLowering : public ConvertToLLVMPattern { +public: + explicit KrnlGlobalOpLowering(MLIRContext *context, + LLVMTypeConverter &lowering_) + : ConvertToLLVMPattern(KrnlGlobalOp::getOperationName(), context, + lowering_) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto *context = op->getContext(); + auto loc = op->getLoc(); + auto *llvmDialect = + op->getContext()->getRegisteredDialect(); + assert(llvmDialect && "expected llvm dialect to be registered"); + + auto krnlGlobalOp = llvm::dyn_cast(op); + + // Get module. + ModuleOp module = op->getParentOfType(); + + // Global name. + auto name = krnlGlobalOp.name(); + + // Compute total number of elements. + auto shape = (krnlGlobalOp.shape()).dyn_cast(); + int64_t numElements = 1; + for (int i=0; igetResult(0).getType(); + auto memRefTy = type.cast(); + auto llvmMemRefType = + typeConverter.convertType(type).cast(); + + // The element type of the array. + auto constantElementType = + typeConverter.convertType(memRefTy.getElementType()); + auto globalType = constantElementType; + for (int i=shape.size() - 1; i >= 0; i--) + globalType = LLVM::LLVMType::getArrayTy( + globalType.cast(), ArrayAttrIntVal(shape, i)); + // The llvm type of the global (example: [2 x [8 x float]]) + auto llvmGlobalType = globalType.cast(); + + { + OpBuilder::InsertionGuard insertGuard(rewriter); + rewriter.setInsertionPointToStart(module.getBody()); + + global = rewriter.create(loc, + llvmGlobalType, /*isConstant=*/true, + LLVM::Linkage::Internal, name, krnlGlobalOp.value()); + } + + // Some frequently used types. + auto llvmI8PtrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect); + auto llvmI64Ty = LLVM::LLVMType::getInt64Ty(llvmDialect); + + // Allocate the memory where the constants will be used from. + // This is a region of local memory and needs to be emitted as an alloca. + auto one = rewriter.create(loc, + llvmI64Ty, rewriter.getI64IntegerAttr(1)); + auto alloc = rewriter.create( + loc, llvmGlobalType.getPointerTo(), one, /*alignment=*/0); + + // Copy constant value into the local alloca: + // - Bitcast alloc to i8* + Value int8PtrAlloc = rewriter.create( + loc, llvmI8PtrTy, alloc); + // - Bitcast global to i8* + Value globalValue = rewriter.create(loc, global); + Value i8PtrGlobal = rewriter.create( + loc, llvmI8PtrTy, globalValue); + // - Set size. + Value memRefElementSize = rewriter.create(loc, + llvmI64Ty, rewriter.getI64IntegerAttr( + getMemRefEltSizeInBytes(memRefTy))); + Value numElementsValue = rewriter.create( + loc, llvmI64Ty, rewriter.getI64IntegerAttr(numElements)); + Value totalElementsSize = rewriter.create( + loc, memRefElementSize, numElementsValue); + Value int64Size = rewriter.create( + loc, llvmI64Ty, totalElementsSize); + // - Set volatile. + Value isVolatile = rewriter.create( + loc, LLVM::LLVMType::getInt1Ty(llvmDialect), + rewriter.getIntegerAttr(rewriter.getIntegerType(1), 0)); + // - Copy constant data into the alloca. + auto memcpyRef = getOrInsertMemcpy(rewriter, module, llvmDialect); + rewriter.create( + loc, memcpyRef, LLVM::LLVMType::getVoidTy(llvmDialect), + ArrayRef({int8PtrAlloc, i8PtrGlobal, int64Size, isVolatile})); + + // Prepare data to be inserted into MemRef. + auto llvmConstantElementType = constantElementType.cast(); + Value typedAlloc = rewriter.create( + loc, llvmConstantElementType.getPointerTo(), alloc); + + // Create llvm MemRef from original MemRef and fill the data pointers. + auto llvmMemRef = MemRefDescriptor::fromStaticShape( + rewriter, loc, typeConverter, memRefTy, typedAlloc); + + rewriter.replaceOp(op, {llvmMemRef}); + return success(); + } + +private: + static int64_t ArrayAttrIntVal(ArrayAttr a, int i) { + return (a.getValue()[i]).cast().getInt(); + } +}; + //===----------------------------------------------------------------------===// // KRNL to LLVM: KrnlMemcpyOpLowering //===----------------------------------------------------------------------===// @@ -120,33 +266,6 @@ public: rewriter.eraseOp(op); return success(); } - -private: - /// Return a symbol reference to the memcpy function, inserting it into the - /// module if necessary. - static FlatSymbolRefAttr getOrInsertMemcpy(PatternRewriter &rewriter, - ModuleOp module, LLVM::LLVMDialect *llvmDialect) { - auto *context = module.getContext(); - if (module.lookupSymbol("llvm.memcpy.p0i8.p0i8.i64")) - return SymbolRefAttr::get("llvm.memcpy.p0i8.p0i8.i64", context); - // Create a function declaration for memcpy, the signature is: - // * `void (i8*, i8* , i64, i1)` - auto llvmVoidTy = LLVM::LLVMType::getVoidTy(llvmDialect); - auto llvmI8PtrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect); - auto llvmI64Ty = LLVM::LLVMType::getInt64Ty(llvmDialect); - auto llvmI1Ty = LLVM::LLVMType::getInt1Ty(llvmDialect); - auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmVoidTy, - ArrayRef( - {llvmI8PtrTy, llvmI8PtrTy, llvmI64Ty, llvmI1Ty}), - false); - - // Insert the memcpy function into the body of the parent module. - PatternRewriter::InsertionGuard insertGuard(rewriter); - rewriter.setInsertionPointToStart(module.getBody()); - rewriter.create( - module.getLoc(), "llvm.memcpy.p0i8.p0i8.i64", llvmFnType); - return SymbolRefAttr::get("llvm.memcpy.p0i8.p0i8.i64", context); - } }; //===----------------------------------------------------------------------===// @@ -506,6 +625,8 @@ void KrnlToLLVMLoweringPass::runOnModule() { /*useAlloca=*/false, /*emitCWrapper=*/true); + patterns.insert(&getContext(), typeConverter); + // Lower from the `krnl` dialect i.e. the Reshape operation. patterns.insert( &getContext()); diff --git a/test/mlir/krnl/constant.mlir b/test/mlir/krnl/constant.mlir new file mode 100644 index 0000000..b2d501b --- /dev/null +++ b/test/mlir/krnl/constant.mlir @@ -0,0 +1,53 @@ +// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s + +func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> { + %0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK: llvm.func @llvm.memcpy.p0i8.p0i8.i64(!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) + // CHECK: llvm.mlir.global internal constant [[GLOBAL_CONST:@.+]](dense<{{.*}}[0.000000e+00, 0.000000e+00], [1.000000e+00, 1.100000e+00], [2.000000e+00, 2.100000e+00]{{.*}}> : tensor<3x2xf32>) : !llvm<"[3 x [2 x float]]"> + // CHECK: llvm.func @test_constant({{.*}}) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { + + // CHECK: [[CONST1:%.+]] = llvm.mlir.constant(1 : i64) : !llvm.i64 + // CHECK: [[ALLOCA:%.+]] = llvm.alloca [[CONST1]] x !llvm<"[3 x [2 x float]]"> : (!llvm.i64) -> !llvm<"[3 x [2 x float]]*"> + // CHECK: [[I8ALLOCA:%.+]] = llvm.bitcast [[ALLOCA]] : !llvm<"[3 x [2 x float]]*"> to !llvm<"i8*"> + + // CHECK: [[GLOBAL_ADDR:%.+]] = llvm.mlir.addressof [[GLOBAL_CONST]] : !llvm<"[3 x [2 x float]]*"> + // CHECK: [[I8GLOBAL:%.+]] = llvm.bitcast [[GLOBAL_ADDR]] : !llvm<"[3 x [2 x float]]*"> to !llvm<"i8*"> + + /// Size of the constant tensor in bytes. + // CHECK: [[CONST4:%.+]] = llvm.mlir.constant(4 : i64) : !llvm.i64 + // CHECK: [[CONST6:%.+]] = llvm.mlir.constant(6 : i64) : !llvm.i64 + // CHECK: [[CONST_MUL1:%.+]] = llvm.mul [[CONST4]], [[CONST6]] : !llvm.i64 + // CHECK: [[GLOBAL_SIZE_BYTES:%.+]] = llvm.sext [[CONST_MUL1]] : !llvm.i64 to !llvm.i64 + + /// Volatile flag + // CHECK: [[CONST0:%.+]] = llvm.mlir.constant(0 : i1) : !llvm.i1 + + // CHECK: llvm.call @llvm.memcpy.p0i8.p0i8.i64([[I8ALLOCA]], [[I8GLOBAL]], [[GLOBAL_SIZE_BYTES]], [[CONST0]]) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> !llvm.void + + /// Prepare data for MemRef insertion. + // CHECK: [[TYPED_ALLOCA:%.+]] = llvm.bitcast [[ALLOCA]] : !llvm<"[3 x [2 x float]]*"> to !llvm<"float*"> + + /// Insert the constant value in the local MemRef. + // CHECK: [[LOCAL_MEMREF:%.+]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: [[LOCAL_MEMREF0:%.+]] = llvm.insertvalue [[TYPED_ALLOCA]], [[LOCAL_MEMREF]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: [[LOCAL_MEMREF1:%.+]] = llvm.insertvalue [[TYPED_ALLOCA]], [[LOCAL_MEMREF0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + + /// Insert offset. + // CHECK: [[CONST00:%.+]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // CHECK: [[MEMREF1:%.+]] = llvm.insertvalue [[CONST00]], [[LOCAL_MEMREF1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + + /// Insert sizes and strides. + // CHECK: [[CONST3:%.+]] = llvm.mlir.constant(3 : index) : !llvm.i64 + // CHECK: [[MEMREF2:%.+]] = llvm.insertvalue [[CONST3]], [[MEMREF1]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: [[CONST1:%.+]] = llvm.mlir.constant(2 : index) : !llvm.i64 + // CHECK: [[MEMREF3:%.+]] = llvm.insertvalue [[CONST1]], [[MEMREF2]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + + // CHECK: [[CONST2:%.+]] = llvm.mlir.constant(2 : index) : !llvm.i64 + // CHECK: [[MEMREF4:%.+]] = llvm.insertvalue [[CONST2]], [[MEMREF3]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: [[CONST1:%.+]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // CHECK: [[MEMREF5:%.+]] = llvm.insertvalue [[CONST1]], [[MEMREF4]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + + // CHECK: llvm.return [[MEMREF5]] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +} diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir index 0b7451f..f1453bd 100644 --- a/test/mlir/onnx/onnx_lowering.mlir +++ b/test/mlir/onnx/onnx_lowering.mlir @@ -1678,22 +1678,7 @@ func @test_constant_dense_2d_value(%arg0: tensor<1xf32>) -> tensor<*xf32> { %0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32> "std.return"(%0) : (tensor<*xf32>) -> () // CHECK-LABEL: test_constant_dense_2d_value - // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> - // CHECK: %[[INDEX_0:.+]] = constant 0 : index - // CHECK: %[[INDEX_1:.+]] = constant 1 : index - // CHECK: %[[INDEX_2:.+]] = constant 2 : index - // CHECK: [[CONSTANT_0:%.+]] = constant 0.000000e+00 : f32 - // CHECK: affine.store [[CONSTANT_0]], %0[%[[INDEX_0]], %[[INDEX_0]]] : memref<3x2xf32> - // CHECK: [[CONSTANT_1:%.+]] = constant 0.000000e+00 : f32 - // CHECK: affine.store [[CONSTANT_1]], %0[%[[INDEX_0]], %[[INDEX_1]]] : memref<3x2xf32> - // CHECK: [[CONSTANT_2:%.+]] = constant 1.000000e+00 : f32 - // CHECK: affine.store [[CONSTANT_2]], %0[%[[INDEX_1]], %[[INDEX_0]]] : memref<3x2xf32> - // CHECK: [[CONSTANT_3:%.+]] = constant 1.100000e+00 : f32 - // CHECK: affine.store [[CONSTANT_3]], %0[%[[INDEX_1]], %[[INDEX_1]]] : memref<3x2xf32> - // CHECK: [[CONSTANT_4:%.+]] = constant 2.000000e+00 : f32 - // CHECK: affine.store [[CONSTANT_4]], %0[%[[INDEX_2]], %[[INDEX_0]]] : memref<3x2xf32> - // CHECK: [[CONSTANT_5:%.+]] = constant 2.100000e+00 : f32 - // CHECK: affine.store [[CONSTANT_5]], %0[%[[INDEX_2]], %[[INDEX_1]]] : memref<3x2xf32> + // CHECK: [[RES:%.+]] = "krnl.global"() {name = "constant_0", shape = [3, 2], value = dense<{{.*}}[0.000000e+00, 0.000000e+00], [1.000000e+00, 1.100000e+00], [2.000000e+00, 2.100000e+00]{{.*}}> : tensor<3x2xf32>} : () -> memref<3x2xf32> // CHECK: return [[RES]] : memref<3x2xf32> }