From 0d7784096961b2aa239368a169e499428c5b256d Mon Sep 17 00:00:00 2001
From: Alexandre Eichenberger <alexe@us.ibm.com>
Date: Thu, 30 Jan 2020 14:30:28 -0500
Subject: [PATCH] Inference maxpool (#48)

* first steps for shape inference of maxpool

* setps forward

* ongoing

* working version

* first steps for shape inference of maxpool

* setps forward

* ongoing

* working version

* fix errors introduced by github merge

* changes suggested by Doru

* updates

* requested fixes

* reqested changes

Co-authored-by: Gheorghe-Teodor Bercea <gt.bercea@gmail.com>
---
 src/dialect/onnx/onnx.td                      |   2 +-
 src/dialect/onnx/onnx_ops.cpp                 | 155 ++++++++++++++++++
 src/pass/shape_inference_pass.cpp             |   1 +
 .../onnx/onnx_shape_inference_maxpool.mlir    | 100 +++++++++++
 4 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 test/mlir/onnx/onnx_shape_inference_maxpool.mlir
diff --git a/src/dialect/onnx/onnx.td b/src/dialect/onnx/onnx.td
index 8fd0e09..340c910 100644
--- a/src/dialect/onnx/onnx.td
+++ b/src/dialect/onnx/onnx.td
@@ -128,7 +128,7 @@ def ONNXConvNoBiasOp:ONNX_Op<"ConvNoBias",
 }
 
 def ONNXMaxPoolSingleOutOp: ONNX_Op<"MaxPoolSingleOut",
-    [NoSideEffect]> {
+    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX MaxPool operation with a single output.";
   let description = [{
     "ONNX MaxPool operation with a single output."
diff --git a/src/dialect/onnx/onnx_ops.cpp b/src/dialect/onnx/onnx_ops.cpp
index c2aa199..6a70b01 100644
--- a/src/dialect/onnx/onnx_ops.cpp
+++ b/src/dialect/onnx/onnx_ops.cpp
@@ -746,6 +746,161 @@ void ONNXConvNoBiasOp::inferShapes() {
 }
 
 //===----------------------------------------------------------------------===//
+
+// MaxPoolSingleOut
+
+void ONNXMaxPoolSingleOutOp::inferShapes() {
+  // Cannot infer shape if no shape exists.
+  if (!X().getType().isa<RankedTensorType>())
+    return;
+
+  // 1) get shape of input
+  auto xTy = X().getType().cast<RankedTensorType>();
+  auto xShape = xTy.getShape();
+  auto xRank = xShape.size();
+
+  // 2) analyse parameters
+  // get kernel sizes from kernel_shape attribute 
+  auto kernelShape = kernel_shape();
+  if (!kernelShape)
+    emitError("kernel_shape is a mandatory attribute for which there is no default.");
+  auto kernelShapeArray = kernelShape.getValue();
+  auto kernelRank = kernelShape.size(); 
+  if (kernelRank > xRank)
+    emitError("kernel_shape spatial dimension is too large.");
+  auto kernelOffset = xRank - kernelRank;
+
+  // ceil mode
+  auto ceilMode = ceil_mode().getSExtValue();
+
+  // dilatation
+  SmallVector<int64_t, 4> actualDilations;
+  auto dilationsOpt = dilations();
+  if (dilationsOpt.hasValue()) {
+    auto dilationsArray = dilationsOpt.getValue().getValue(); // opt -> attr -> array
+    if (dilationsArray.size() != kernelRank)
+        emitError("dialation rank is not the same as the spatial rank.");
+    // fill in the actual values
+    for (int i = 0; i < kernelRank; ++i) {
+      int64_t d = (dilationsArray[i]).cast<IntegerAttr>().getInt();
+      if (d < 1) 
+        emitError("dialation value must be nonzero positive.");
+      actualDilations.emplace_back(d);
+    }
+  } else {
+    for(int i=0; i < kernelRank; ++i) {
+      actualDilations.emplace_back(1);      
+    }
+  }
+
+  // storage order
+  
+  // strides
+  SmallVector<int64_t, 4> actualStrides;
+  auto stridesOpt = strides();
+  if (stridesOpt.hasValue()) {
+    auto stridesArray = stridesOpt.getValue().getValue();
+    if (stridesArray.size() != kernelRank)
+        emitError("strides rank is not the same as the spatial rank.");
+    // fill in the actual values
+    for (int i = 0; i < kernelRank; ++i) {
+      int64_t s = (stridesArray[i]).cast<IntegerAttr>().getInt();
+      if (s < 1) 
+        emitError("strides value must be nonzero positive.");
+      actualStrides.emplace_back(s);
+    }
+  } else {
+    for(int i=0; i < kernelRank; ++i) {
+      actualStrides.emplace_back(1);      
+    }
+  }
+
+  // now try to find padding, getting auto_pad attribute first
+  auto autoPad = auto_pad();
+  // and then investigate the various different cases
+  SmallVector<int64_t, 4> actualPads;
+  auto defaultPads = false;
+  if (autoPad == "NOTSET") {
+    auto padsOpt = pads();
+    if (padsOpt.hasValue()) {
+      auto padsArray = padsOpt.getValue().getValue();
+      // pads consists of two entries for each spatial axis.
+      if (padsArray.size() != 2 * kernelRank)
+        emitError("pads rank is not twice the spatial rank.");
+      // fill in the actual values
+      for (int i = 0; i < 2*kernelRank; ++i) {
+        int64_t p = (padsArray[i]).cast<IntegerAttr>().getInt();
+        if (p < 0) 
+          emitError("pads value must be nonnegative.");
+        actualPads.emplace_back(p);
+      }
+    } else {
+      // pads are not defined, default to value 0
+      defaultPads = true;
+    }
+  } else if (autoPad == "VALID") {
+    defaultPads = true;
+  } else if (autoPad == "SAME_UPPER" || autoPad == "SAME_LOWER") {
+    // init pad with zero
+    for(int i=0; i<2*kernelRank; ++i) {
+      actualPads.emplace_back(0);
+    }
+    for(int i=0; i<kernelRank; ++i) {
+      auto inputSpatialShape = xShape[kernelOffset  + i];
+      auto kernelSpatialShape = (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
+      auto dilations = actualDilations[i];
+      auto strideSpatialShape = actualStrides[i];
+      int64_t outputSpatialShape = ceil((1.0 * inputSpatialShape) /
+        (1.0 * strideSpatialShape));
+      auto sumOfPad = (outputSpatialShape - 1) * strideSpatialShape + 
+        ((kernelSpatialShape - 1) * dilations + 1) - inputSpatialShape;
+      actualPads[i] = actualPads[kernelRank + i] = sumOfPad / 2;
+      if (sumOfPad % 2 != 0) {
+        if (autoPad == "SAME_UPPER") {
+          actualPads[kernelRank + i] += 1;
+        } else {
+          actualPads[i] += 1;          
+        }
+      }
+    }
+  } else {
+    emitError("auto_pad of unknown / unsupported value.");
+  }
+  // handle case where default pad values must be used
+  if (defaultPads) {
+    for(int i=0; i<2*kernelRank; ++i) {
+      actualPads.emplace_back(0);
+    }
+  }
+
+  // initialize output shape 
+  SmallVector<int64_t, 4> yShape(xShape.begin(), xShape.end());
+  // for all kernel dimensions
+  for(int i=0; i<kernelRank; ++i) {
+    auto inputSpatialShape = xShape[kernelOffset  + i];
+    auto padShape = actualPads[i] + actualPads[kernelRank+i];
+    auto kernelSpatialShape = (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
+    auto dilations = actualDilations[i];
+    auto strideSpatialShape = actualStrides[i];
+    ///output_spatial_shape[i] = ceil( (input_spatial_shape[i] + pad_shape[i] - 
+    //  ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)
+    double numerator = inputSpatialShape + padShape - 
+      ((kernelSpatialShape - 1) * dilations + 1);
+    double denominator = strideSpatialShape;
+    int64_t res;
+    if (ceilMode) {
+      res = ceil(numerator / denominator) + 1;
+    } else {
+      res = floor(numerator / denominator) + 1;
+    }
+    yShape[kernelOffset + i] = res;
+  }
+  auto arrayTy = getOperand().getType().cast<RankedTensorType>();
+  getResult().setType(RankedTensorType::get(yShape, arrayTy.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+
 // Unsqueeze
 
 void ONNXUnsqueezeOp::inferShapes() {
diff --git a/src/pass/shape_inference_pass.cpp b/src/pass/shape_inference_pass.cpp
index 0b5993b..8af15c9 100644
--- a/src/pass/shape_inference_pass.cpp
+++ b/src/pass/shape_inference_pass.cpp
@@ -112,6 +112,7 @@ public:
         op->getName().getStringRef() != "onnx.Xor" &&
         op->getName().getStringRef() != "onnx.Sum" &&
         op->getName().getStringRef() != "onnx.Max" &&
+        op->getName().getStringRef() != "onnx.MaxPoolSingleOut" &&
         op->getName().getStringRef() != "onnx.Min" &&
         op->getName().getStringRef() != "onnx.Identity" &&
         op->getName().getStringRef() != "onnx.MatMul" &&
diff --git a/test/mlir/onnx/onnx_shape_inference_maxpool.mlir b/test/mlir/onnx/onnx_shape_inference_maxpool.mlir
new file mode 100644
index 0000000..3ebaf34
--- /dev/null
+++ b/test/mlir/onnx/onnx_shape_inference_maxpool.mlir
@@ -0,0 +1,100 @@
+// RUN: onnf-opt --shape-inference %s -split-input-file | FileCheck %s
+
+/// Test the default behavior of Max Pool with no padding (pad are set but shoudl be ignored)
+func @test_default_maxpoolsingleout(%arg0 : tensor<5x5x32x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "VALID", ceil_mode = 0, kernel_shape = [3,3], pads = [1, 1, 1, 1] } : (tensor<5x5x32x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "VALID", ceil_mode = 0 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x30x30xf32>
+// CHECK: return [[RES]] : tensor<5x5x30x30xf32>
+
+
+/// Test the default behavior of Max Pool with no padding (pad are not set, default to zero)
+func @test_default_maxpoolsingleout_defpad(%arg0 : tensor<5x5x32x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0, kernel_shape = [3,3]} : (tensor<5x5x32x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_defpad
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [3, 3]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x30x30xf32>
+// CHECK: return [[RES]] : tensor<5x5x30x30xf32>
+
+
+/// Test the default behavior of Max Pool with uniform padding
+func @test_default_maxpoolsingleout_pad(%arg0 : tensor<5x5x32x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0, kernel_shape = [3,3], pads = [1, 1, 1, 1] } : (tensor<5x5x32x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_pad
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x32x32xf32>
+// CHECK: return [[RES]] : tensor<5x5x32x32xf32>
+
+
+/// Test the default behavior of Max Pool with non uniform padding
+func @test_default_maxpoolsingleout_pad_nonunif(%arg0 : tensor<5x5x32x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0, kernel_shape = [5,3], pads = [2, 1, 1, 0] } : (tensor<5x5x32x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_pad_nonunif
+// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [5, 3], pads = [2, 1, 1, 0]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x31x31xf32>
+// CHECK: return [[RES]] : tensor<5x5x31x31xf32>
+
+
+/// Test the default behavior of Max Pool with non uniform padding
+func @test_default_maxpoolsingleout_strides(%arg0 : tensor<5x5x32x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0, kernel_shape = [3,3], pads = [1, 1, 1, 1], strides = [2, 2] } : (tensor<5x5x32x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_strides
+// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x16x16xf32>
+// CHECK: return [[RES]] : tensor<5x5x16x16xf32>
+
+
+/// Test the default behavior of Max Pool with non uniform padding
+func @test_default_maxpoolsingleout_strides_nonunifpad(%arg0 : tensor<5x5x30x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0, kernel_shape = [2,2], pads = [1, 0, 0, 0], strides = [2, 2] } : (tensor<5x5x30x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_strides_nonunifpad
+// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [2, 2], pads = [1, 0, 0, 0], strides = [2, 2]} : (tensor<5x5x30x32xf32>) -> tensor<5x5x15x16xf32>
+// CHECK: return [[RES]] : tensor<5x5x15x16xf32>
+
+
+/// Test the default behavior of Max Pool with non uniform padding
+func @test_default_maxpoolsingleout_strides_nonunifpad_ceil(%arg0 : tensor<5x5x30x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 1, kernel_shape = [2,2], pads = [1, 0, 0, 0], strides = [2, 2] } : (tensor<5x5x30x32xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_strides_nonunifpad_ceil
+// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 1 : i64, kernel_shape = [2, 2], pads = [1, 0, 0, 0], strides = [2, 2]} : (tensor<5x5x30x32xf32>) -> tensor<5x5x16x16xf32>
+// CHECK: return [[RES]] : tensor<5x5x16x16xf32>
+
+
+/// Test the default behavior of Max Pool with dilatation
+func @test_default_maxpoolsingleout_strides_dilatation(%arg0 : tensor<5x5x8x8xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0, kernel_shape = [2,2], dilations = [2, 2], strides = [3, 3] } : (tensor<5x5x8x8xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_strides_dilatation
+// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [2, 2], kernel_shape = [2, 2], strides = [3, 3]} : (tensor<5x5x8x8xf32>) -> tensor<5x5x2x2xf32>
+// CHECK: return [[RES]] : tensor<5x5x2x2xf32>
+
+/// Test the default behavior of Max Pool with dilatation
+func @test_default_maxpoolsingleout_upper(%arg0 : tensor<5x5x16x13xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "SAME_UPPER", ceil_mode = 0, kernel_shape = [4,4], strides = [4, 4] } : (tensor<5x5x16x13xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_upper
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "SAME_UPPER", ceil_mode = 0 : i64, kernel_shape = [4, 4], strides = [4, 4]} : (tensor<5x5x16x13xf32>) -> tensor<5x5x4x4xf32>
+// CHECK: return [[RES]] : tensor<5x5x4x4xf32>
+
+
+/// Test the default behavior of Max Pool with dilatation
+func @test_default_maxpoolsingleout_lower(%arg0 : tensor<5x5x16x13xf32>) -> tensor<*xf32> {
+  %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "SAME_LOWER", ceil_mode = 0, kernel_shape = [4,4], strides = [4, 4] } : (tensor<5x5x16x13xf32>) -> tensor<*xf32>
+  "std.return"(%0) : (tensor<*xf32>) -> ()
+}
+// CHECK-LABEL: test_default_maxpoolsingleout_lower
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "SAME_LOWER", ceil_mode = 0 : i64, kernel_shape = [4, 4], strides = [4, 4]} : (tensor<5x5x16x13xf32>) -> tensor<5x5x4x4xf32>
+// CHECK: return [[RES]] : tensor<5x5x4x4xf32>
+