From 66074da3ac82df25b293d1084b3795eb76b966e1 Mon Sep 17 00:00:00 2001
From: "Tung D. Le" <tung@jp.ibm.com>
Date: Sun, 20 Sep 2020 01:47:39 +0900
Subject: [PATCH] Lower ONNXConstantOfShapeOp to Krnl dialect (#296)

* Lower ONNXConstantOfShapeOp to Krnl dialect

* Change a variable name

* Add comments to lit tests

Co-authored-by: Alexandre Eichenberger <alexe@us.ibm.com>
---
 src/Conversion/ONNXToKrnl/CMakeLists.txt      |   1 +
 .../ONNXToKrnl/ConvertONNXToKrnl.cpp          |   1 +
 .../ONNXToKrnl/ONNXToKrnlCommon.hpp           |   3 +
 .../ONNXToKrnl/Tensor/ConstantOfShape.cpp     | 100 ++++++++++++++++++
 test/backend/test.py                          |  12 +++
 test/mlir/onnx/onnx_lowering.mlir             |  80 ++++++++++++++
 6 files changed, 197 insertions(+)
 create mode 100644 src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp
diff --git a/src/Conversion/ONNXToKrnl/CMakeLists.txt b/src/Conversion/ONNXToKrnl/CMakeLists.txt
index 2a99113..7684649 100644
--- a/src/Conversion/ONNXToKrnl/CMakeLists.txt
+++ b/src/Conversion/ONNXToKrnl/CMakeLists.txt
@@ -20,6 +20,7 @@ add_library(OMONNXToKrnl
         Tensor/Squeeze.cpp
         Tensor/Unsqueeze.cpp
         Tensor/Constant.cpp
+        Tensor/ConstantOfShape.cpp
         Tensor/Concat.cpp
         Tensor/Split.cpp
         Tensor/Gather.cpp
diff --git a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp
index 047f888..083202f 100644
--- a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp
+++ b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp
@@ -98,6 +98,7 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
   populateLoweringONNXTransposeOpPattern(patterns, &getContext());
   populateLoweringONNXGatherOpPattern(patterns, &getContext());
   populateLoweringONNXIdentityOpPattern(patterns, &getContext());
+  populateLoweringONNXConstantOfShapeOpPattern(patterns, &getContext());
   populateLoweringONNXConstantOpPattern(patterns, &getContext());
   populateLoweringONNXConcatOpPattern(patterns, &getContext());
   populateLoweringONNXSqueezeOpPattern(patterns, &getContext());
diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp
index b5ff470..d39012e 100644
--- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp
+++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp
@@ -238,6 +238,9 @@ void populateLoweringONNXReshapeOpPattern(
 void populateLoweringONNXIdentityOpPattern(
     OwningRewritePatternList &patterns, MLIRContext *ctx);
 
+void populateLoweringONNXConstantOfShapeOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
 void populateLoweringONNXConstantOpPattern(
     OwningRewritePatternList &patterns, MLIRContext *ctx);
 
diff --git a/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp b/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp
new file mode 100644
index 0000000..0fc5291
--- /dev/null
+++ b/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp
@@ -0,0 +1,100 @@
+//===------------ ConstantOfShape.cpp - Lowering ConstantOfShape Op -------===//
+//
+// Copyright 2019 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file lowers the ONNX ConstantOfShape Operator to Krnl dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
+
+using namespace mlir;
+
+struct ONNXConstantOfShapeOpLowering : public ConversionPattern {
+  ONNXConstantOfShapeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            mlir::ONNXConstantOfShapeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+      ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    ONNXConstantOfShapeOpAdaptor operandAdaptor(operands);
+
+    auto valueAttr = llvm::cast<ONNXConstantOfShapeOp>(op)
+                         .value()
+                         .getValue()
+                         .cast<DenseElementsAttr>();
+
+    auto memRefType = convertToMemRefType(*op->result_type_begin());
+    auto elementType = memRefType.getElementType();
+    size_t rank = memRefType.cast<ShapedType>().getRank();
+
+    // Allocate memory for the output.
+    Value alloc;
+    bool insertDealloc = checkInsertDealloc(op);
+    if (hasAllConstantDimensions(memRefType))
+      alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
+    else {
+      SmallVector<Value, 2> allocOperands;
+      // Load dimensions from the input.
+      for (decltype(rank) i = 0; i < rank; ++i) {
+        auto index = emitConstantOp(rewriter, loc, rewriter.getIndexType(), i);
+        auto dim =
+            rewriter.create<AffineLoadOp>(loc, operandAdaptor.input(), index);
+        auto dimIndex =
+            rewriter.create<IndexCastOp>(loc, rewriter.getIndexType(), dim);
+        allocOperands.emplace_back(dimIndex);
+      }
+      // Allocate memory.
+      alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
+      // Insert deallocation if needed.
+      if (insertDealloc) {
+        Block *parentBlock = alloc.getDefiningOp()->getBlock();
+        DeallocOp dealloc = rewriter.create<DeallocOp>(loc, alloc);
+        dealloc.getOperation()->moveBefore(&parentBlock->back());
+      }
+    }
+
+    // Get the constant value from the attribute 'value'.
+    Value constantVal;
+    if (elementType.isa<IntegerType>()) {
+      auto valueIt = valueAttr.getValues<IntegerAttr>().begin();
+      auto valueInt = (*valueIt++).cast<IntegerAttr>().getInt();
+      constantVal = emitConstantOp(rewriter, loc, elementType, valueInt);
+    } else if (elementType.isa<FloatType>()) {
+      auto valueIt = valueAttr.getValues<FloatAttr>().begin();
+      auto valueFloat = (*valueIt++).cast<FloatAttr>().getValueAsDouble();
+      constantVal = emitConstantOp(rewriter, loc, elementType, valueFloat);
+    } else {
+      llvm_unreachable("unsupported element type");
+    }
+
+    SmallVector<Value, 4> loopIVs;
+    // Create a Krnl iterate if the output is not a scalar tensor.
+    if (!hasAllScalarValues({alloc})) {
+      BuildKrnlLoop loops(rewriter, loc, rank);
+      loops.createDefineAndIterateOp(alloc);
+      Block *iterationBlock = loops.getIterateBlock();
+      // Get IVs.
+      for (auto arg : iterationBlock->getArguments())
+        loopIVs.push_back(arg);
+      // Insert instructions inside the KernelIterateOp body.
+      rewriter.setInsertionPointToStart(iterationBlock);
+    }
+
+    // Store the constant value to the output.
+    rewriter.create<AffineStoreOp>(loc, constantVal, alloc, loopIVs);
+
+    // Replace this operation with the generated alloc.
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+void populateLoweringONNXConstantOfShapeOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx) {
+  patterns.insert<ONNXConstantOfShapeOpLowering>(ctx);
+}
diff --git a/test/backend/test.py b/test/backend/test.py
index 6e58d79..94c5523 100644
--- a/test/backend/test.py
+++ b/test/backend/test.py
@@ -420,6 +420,18 @@ test_to_enable = [
     "test_split_variable_parts_2d_cpu",
     "test_split_variable_parts_default_axis_cpu",
 
+    # ConstantOfShape
+    "test_constantofshape_float_ones_cpu",
+    # Error:
+    #    Items are not equal:
+    #     ACTUAL: dtype('int32')
+    #     DESIRED: dtype('uint8')
+    # In this test, 'int32' was specified for value attribute as in
+    # onnx/onnx/backend/test/case/node/constantofshape.py
+    # and onnx-mlir correctly imported and converted the model.
+    # It is unknown why 'uint8' came from.
+    #"test_constantofshape_int_zeros_cpu",
+
     # Model
     "test_resnet50_cpu",
     "test_vgg19_cpu",
diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir
index 9e0d131..6519a42 100644
--- a/test/mlir/onnx/onnx_lowering.mlir
+++ b/test/mlir/onnx/onnx_lowering.mlir
@@ -2170,3 +2170,83 @@ func @test_gather_axis1(%arg0 : tensor<3x3xf32>) -> tensor<1x3x2xf32> {
   // CHECK: [[DATA:%.+]] = load %arg0{{.}}[[ARG1]], [[AFFINE2]]{{.}} : memref<3x3xf32>
   // CHECK: affine.store [[DATA]], [[ALLOC]]{{.}}[[ARG1]], [[ARG2]], [[ARG3]]{{.}} : memref<1x3x2xf32>
 }
+
+// -----
+
+// Check the lowering of ConstantOfShape when:
+//   - No value attribute.
+//   - The input is an empty tensor.
+// Expected emitted code:
+//   - No need a Krnl iterate.
+//   - The output is a scalar tensor.
+func @test_constant_of_shape_empty_tensor(%arg0 : tensor<0xi64>) -> tensor<*xf32> {
+  %0 = "onnx.ConstantOfShape"(%arg0) : (tensor<0xi64>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+
+  // CHECK-LABEL: test_constant_of_shape_empty_tensor
+  // CHECK: [[RES:%.+]] = alloc() : memref<f32>
+  // CHECK: [[CST_VALUE:%.+]] = constant 0.000000e+00 : f32
+  // CHECK: affine.store [[CST_VALUE]], [[RES]][] : memref<f32>
+  // CHECK: return [[RES]] : memref<f32>
+}
+
+// -----
+
+// Check the lowering of ConstantOfShape when:
+//   - The input is not a constant tensor.
+// Expected emitted code:
+//   - Emit code to compute output dimensions from the input's dimensions.
+//   - Krnl iterates are used to set values to the output.
+func @test_constant_of_shape_dynamic_dims(%arg0 : tensor<3xi64>) -> tensor<*xf32> {
+  %0 = "onnx.ConstantOfShape"(%arg0) {value = dense<[1.0]> : tensor<1xf32>} : (tensor<3xi64>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+
+  // CHECK-LABEL: test_constant_of_shape_dynamic_dims
+  // CHECK: [[CST0:%.+]] = constant 0 : index
+  // CHECK: [[LOAD_DIM_0:%.+]] = affine.load %arg0{{\[}}[[CST0]]{{\]}} : memref<3xi64>
+  // CHECK: [[DIM_0:%.+]] = index_cast [[LOAD_DIM_0]] : i64 to index
+  // CHECK: [[CST1:%.+]] = constant 1 : index
+  // CHECK: [[LOAD_DIM_1:%.+]] = affine.load %arg0{{\[}}[[CST1]]{{\]}} : memref<3xi64>
+  // CHECK: [[DIM_1:%.+]] = index_cast [[LOAD_DIM_1]] : i64 to index
+  // CHECK: [[CST2:%.+]] = constant 2 : index
+  // CHECK: [[LOAD_DIM_2:%.+]] = affine.load %arg0{{\[}}[[CST2]]{{\]}} : memref<3xi64>
+  // CHECK: [[DIM_2:%.+]] = index_cast [[LOAD_DIM_2]] : i64 to index
+  // CHECK: [[RES:%.+]] = alloc([[DIM_0]], [[DIM_1]], [[DIM_2]]) : memref<?x?x?xf32>
+
+  // CHECK: [[CST_VALUE:%.+]] = constant 1.000000e+00 : f32
+  // CHECK: [[LOOP_DEF:%.+]]:3 = krnl.define_loops 3
+  // CHECK: [[CST00:%.+]] = constant 0 : index
+  // CHECK: [[RES_DIM_0:%.+]] = dim [[RES]], [[CST00]] : memref<?x?x?xf32>
+  // CHECK: [[CST11:%.+]] = constant 1 : index
+  // CHECK: [[RES_DIM_1:%.+]] = dim [[RES]], [[CST11]] : memref<?x?x?xf32>
+  // CHECK: [[CST22:%.+]] = constant 2 : index
+  // CHECK: [[RES_DIM_2:%.+]] = dim [[RES]], [[CST22]] : memref<?x?x?xf32>
+  // CHECK: krnl.iterate([[LOOP_DEF]]#0, [[LOOP_DEF]]#1, [[LOOP_DEF]]#2) with ([[LOOP_DEF]]#0 -> %arg1 = 0 to [[RES_DIM_0]], [[LOOP_DEF]]#1 -> %arg2 = 0 to [[RES_DIM_1]], [[LOOP_DEF]]#2 -> %arg3 = 0 to [[RES_DIM_2]]) {
+  // CHECK:   affine.store [[CST_VALUE]], [[RES]][%arg1, %arg2, %arg3] : memref<?x?x?xf32>
+  // CHECK: }
+  // CHECK: return [[RES]] : memref<?x?x?xf32>
+}
+
+// -----
+
+// Check the lowering of ConstantOfShape when:
+//   - The input is a constant tensor.
+// Expected emitted code:
+//   - Output dimensions are computed during compilation time.
+//   - Krnl iterates are used to set values to the output.
+func @test_constant_of_shape_static_dims() -> tensor<*xf32> {
+  %0 = "onnx.Constant"() {value = dense<[3, 4, 5]> : tensor<3xi64> } : () -> tensor<3xi64>
+  %1 = "onnx.ConstantOfShape"(%0) {value = dense<[1.0]> : tensor<1xf32>} : (tensor<3xi64>) -> tensor<*xf32>
+  "std.return"(%1) : (tensor<*xf32>) -> ()
+
+  // CHECK-LABEL: test_constant_of_shape_static_dims
+  // CHECK: [[RES:%.+]] = alloc() : memref<3x4x5xf32>
+  // CHECK: [[GLOBAL_CST:%.+]] = "krnl.global"() {name = "constant_0", shape = [3], value = dense<[3, 4, 5]> : tensor<3xi64>} : () -> memref<3xi64>
+  // CHECK: [[CST_VALUE:%.+]] = constant 1.000000e+00 : f32
+  // CHECK: [[LOOP_DEF:%.+]]:3 = krnl.define_loops 3
+  // CHECK: krnl.iterate([[LOOP_DEF]]#0, [[LOOP_DEF]]#1, [[LOOP_DEF]]#2) with ([[LOOP_DEF]]#0 -> %arg0 = 0 to 3, [[LOOP_DEF]]#1 -> %arg1 = 0 to 4, [[LOOP_DEF]]#2 -> %arg2 = 0 to 5) {
+  // CHECK:   affine.store [[CST_VALUE]], [[RES]][%arg0, %arg1, %arg2] : memref<3x4x5xf32>
+  // CHECK: }
+  // CHECK: return [[RES]] : memref<3x4x5xf32>
+}
+