From e8a0b47e1078414833c3631416798955c9b2358a Mon Sep 17 00:00:00 2001
From: Gheorghe-Teodor Bercea <gt.bercea@gmail.com>
Date: Tue, 10 Mar 2020 16:58:05 -0400
Subject: [PATCH] Fix case for upper and lower padding when strides are
 present. (#11)

* Fix case for upper and lower padding when strides are present.

* Address comments.

* Code clean-up.

* Fix tests.
---
 src/dialect/onnx/onnx_ops.cpp            | 99 +++++++++++++++++-------
 test/mlir/onnx/onnx_shape_inference.mlir | 35 +++++----
 2 files changed, 90 insertions(+), 44 deletions(-)
diff --git a/src/dialect/onnx/onnx_ops.cpp b/src/dialect/onnx/onnx_ops.cpp
index 18768b7..0f6a3f3 100644
--- a/src/dialect/onnx/onnx_ops.cpp
+++ b/src/dialect/onnx/onnx_ops.cpp
@@ -782,15 +782,15 @@ void ONNXConvNoBiasOp::inferShapes() {
 
   auto dataTy = X().getType().cast<RankedTensorType>();
   auto weightTy = W().getType().cast<RankedTensorType>();
-  auto dataShape = dataTy.getShape();
+  auto inDataShape = dataTy.getShape();
   auto weightShape = weightTy.getShape();
 
   // Lowest supported convolution is a one dimensional convolution.
-  if (dataShape.size() < 3)
+  if (inDataShape.size() < 3)
     emitError("Data input shape must be at least (NxCxD1)");
 
   // Check that shape of weight and data have same length.
-  if (dataShape.size() != weightShape.size())
+  if (inDataShape.size() != weightShape.size())
     emitError("Weight size not compatible with data size");
 
   // Required attribute auto_pad defaults to NOTSET.
@@ -799,8 +799,8 @@ void ONNXConvNoBiasOp::inferShapes() {
   int64_t group =
       ONNXConvNoBiasOp::group().getSExtValue(); //.getLimitedValue();
   // Check that the X.shape[1] == (W.shape[1] * group) == C condition holds.
-  if (dataShape[1] != -1 && weightShape[1] != -1 &&
-      dataShape[1] != (weightShape[1] * group))
+  if (inDataShape[1] != -1 && weightShape[1] != -1 &&
+      inDataShape[1] != (weightShape[1] * group))
     emitError("Channel dimension mismatch");
 
   // Note: the value of the group attribut only impacts the way the
@@ -811,7 +811,7 @@ void ONNXConvNoBiasOp::inferShapes() {
   //
   SmallVector<int64_t, 2> dims;
   // Insert batch size.
-  dims.emplace_back(dataShape[0]);
+  dims.emplace_back(inDataShape[0]);
   // Insert number of filters being applied (number of output channels).
   dims.emplace_back(weightShape[0]);
 
@@ -821,22 +821,22 @@ void ONNXConvNoBiasOp::inferShapes() {
   //
   SmallVector<int64_t, 2> outSpatialDims;
   // Number of spatial dimensions.
-  int32_t nDims = dataShape.size() - 2;
+  int32_t nSpatialDims = inDataShape.size() - 2;
 
   // Initialize dimenions based on the input spatial dimensions.
-  for (int i = 2; i < dataShape.size(); ++i)
-    outSpatialDims.emplace_back(dataShape[i]);
+  for (int i = 2; i < inDataShape.size(); ++i)
+    outSpatialDims.emplace_back(inDataShape[i]);
 
   // Use kernel_shape attribute if present otherwise use size from weight
   // argument.
   SmallVector<int64_t, 2> kernelDims;
   if (auto kernelShape = kernel_shapeAttr()) {
-    if (ArrayAttrSize(kernelShape) != nDims)
+    if (ArrayAttrSize(kernelShape) != nSpatialDims)
       emitError("kernel_shape length incompatible with spatial dimensions");
-    for (int i = 0; i < nDims; ++i)
+    for (int i = 0; i < nSpatialDims; ++i)
       kernelDims.emplace_back(ArrayAttrIntVal(kernelShape, i));
   } else {
-    for (int i = 0; i < nDims; ++i)
+    for (int i = 0; i < nSpatialDims; ++i)
       kernelDims.emplace_back(weightShape[i + 2]);
   }
 
@@ -852,43 +852,70 @@ void ONNXConvNoBiasOp::inferShapes() {
   // From a dimensionality perspective the kernel size becomes the dilated
   // kernel size.
   if (auto dilations = dilationsAttr()) {
-    if (ArrayAttrSize(dilations) != nDims)
+    if (ArrayAttrSize(dilations) != nSpatialDims)
       emitError("dilations length incompatible with spatial dimensions");
-    for (int i = 0; i < nDims; ++i)
+    for (int i = 0; i < nSpatialDims; ++i)
       kernelDims[i] =
-          (kernelDims[i] + 1) * ArrayAttrIntVal(dilations, i)  -        1;
+          (kernelDims[i] + 1) * ArrayAttrIntVal(dilations, i) - 1;
   }
 
   // Subtract kernel dimensions from input data dimensions.
-  for (int i = 0; i < nDims; ++i)
+  for (int i = 0; i < nSpatialDims; ++i)
     outSpatialDims[i] -= kernelDims[i];
 
+  // Array which holds the padding information.
+  SmallVector<int64_t, 2> actualPads(2 * nSpatialDims, 0);
+  auto stridesAttr = ONNXConvNoBiasOp::stridesAttr();
+
   // Add padding information.
   if (autoPad == "NOTSET") {
     // Use pads to to determine the padding. If attribute is not
     // present then pads is considered to be all zeros (no padding).
     if (auto pads = padsAttr()) {
       // pads consists of two entries for each spatial axis.
-      if (ArrayAttrSize(pads) != 2 * nDims)
+      if (ArrayAttrSize(pads) != 2 * nSpatialDims)
         emitError("pads size is not twice the spatial size");
 
-      for (int i = 0; i < nDims; ++i) {
+      for (int i = 0; i < nSpatialDims; ++i) {
         // Padding for beginning of axis.
         outSpatialDims[i] += ArrayAttrIntVal(pads, i);
         // Padding for end of axis.
-        outSpatialDims[i] += ArrayAttrIntVal(pads, i + nDims);
+        outSpatialDims[i] += ArrayAttrIntVal(pads, i + nSpatialDims);
       }
     }
   } else if (autoPad == "SAME_UPPER" || autoPad == "SAME_LOWER") {
     // Pad input so that output size matches input size.
     // Each spatial dimension needs to be padded by a total of:
     //
-    // K - 1
+    // stride * (InDim - 1) + KerDim - InDim
     //
     // where K is a kernel spatial dimension.
-    // Pad as if stride is 1.
-    for (int i = 0; i < nDims; ++i)
-      outSpatialDims[i] += kernelDims[i] - 1;
+    for (int i = 0; i < nSpatialDims; ++i) {
+      // If strides are given use them otherwise stride is 1.
+      int64_t stride = 1;
+      if (stridesAttr)
+        stride = ArrayAttrIntVal(stridesAttr, i);
+
+      // Compute necessary padding. The input dimensions are stored in
+      // inDataShape.
+      int64_t totalPadding = stride * (inDataShape[i + 2] - 1) +
+          kernelDims[i] - inDataShape[i + 2];
+
+      // Adjust current output value with the value of the padding.
+      // When dividing by stride later on, the output dimension should
+      // be equal to the input dimension.
+      outSpatialDims[i] += totalPadding;
+
+      // Record the upper and lower axis padding.
+      actualPads[i] = actualPads[i + nSpatialDims] = totalPadding / 2;
+      if (totalPadding % 2 != 0) {
+        if (autoPad == "SAME_LOWER") {
+          actualPads[i]++;
+        } else {
+          actualPads[i + nSpatialDims]++;
+        }
+      }
+    }
   } else if (autoPad == "VALID") {
     // No padding
   } else {
@@ -896,18 +923,34 @@ void ONNXConvNoBiasOp::inferShapes() {
   }
 
   // Strides
-  if (auto strides = ONNXConvNoBiasOp::stridesAttr()) {
-    if (ArrayAttrSize(strides) != nDims)
+  if (stridesAttr) {
+    if (ArrayAttrSize(stridesAttr) != nSpatialDims)
       emitError("strides length incompatible with spatial dimensions");
-    for (int i = 0; i < nDims; ++i) {
-      int64_t stride = ArrayAttrIntVal(strides, i);
+    for (int i = 0; i < nSpatialDims; ++i) {
+      int64_t stride = ArrayAttrIntVal(stridesAttr, i);
       outSpatialDims[i] = floor(outSpatialDims[i] / stride);
     }
   }
 
-  for (int i = 0; i < nDims; ++i)
+  for (int i = 0; i < nSpatialDims; ++i)
     outSpatialDims[i] += 1;
 
+  // Check input and output sizes match.
+  if (autoPad == "SAME_UPPER" || autoPad == "SAME_LOWER") {
+    for (int i = 0; i < nSpatialDims; ++i)
+      if (outSpatialDims[i] != inDataShape[i + 2])
+        emitError("input and output spatial dimension mismatch");
+
+    // Set pads values in attributes.
+    auto builder = mlir::Builder(this->getContext());
+    ArrayRef<int64_t> defaultRefs(actualPads);
+    padsAttr(builder.getI64ArrayAttr(defaultRefs));
+
+    // Change auto padding attribute to NOTSET since padding values
+    // are now explicitly included in the operation.
+    auto_padAttr(builder.getStringAttr("NOTSET"));
+  }
+
   dims.append(outSpatialDims.begin(), outSpatialDims.end());
   getResult().setType(RankedTensorType::get(dims, dataTy.getElementType()));
 }
diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir
index f3c82eb..1204dc3 100644
--- a/test/mlir/onnx/onnx_shape_inference.mlir
+++ b/test/mlir/onnx/onnx_shape_inference.mlir
@@ -195,7 +195,7 @@ func @test_conv_no_bias_4(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10
   "std.return"(%0) : (tensor<*xf32>) -> ()
 
   // CHECK-LABEL: test_conv_no_bias_4
-  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
+  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [2, 4, 3, 5]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
   // CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
 }
 
@@ -204,7 +204,7 @@ func @test_conv_no_bias_5(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x10
   "std.return"(%0) : (tensor<*xf32>) -> ()
 
   // CHECK-LABEL: test_conv_no_bias_5
-  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_LOWER", group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
+  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [3, 5, 2, 4]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x10xf32>) -> tensor<1x5x32x64xf32>
   // CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
 }
 
@@ -238,8 +238,8 @@ func @test_conv_no_bias_8(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7x
   "std.return"(%0) : (tensor<*xf32>) -> ()
 
   // CHECK-LABEL: test_conv_no_bias_8
-  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x16x22xf32>
-  // CHECK: return [[RES_ATTR]] : tensor<1x5x16x22xf32>
+  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [18, 66, 18, 66], strides = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x32x64xf32>
+  // CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
 }
 
 /// dilations attribute.
@@ -269,27 +269,30 @@ func @test_conv_no_bias_10(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7
 func @test_conv_no_bias_11(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x7xf32>) -> tensor<*xf32> {
   %0 = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", group = 1 : i64, dilations = [2, 3]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<*xf32>
   "std.return"(%0) : (tensor<*xf32>) -> ()
-}
+
   // CHECK-LABEL: test_conv_no_bias_11
-  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "SAME_UPPER", dilations = [2, 3], group = 1 : i64} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x32x64xf32>
+  // CHECK: [[RES_ATTR:%.+]] = "onnx.ConvNoBias"(%arg0, %arg1) {auto_pad = "NOTSET", dilations = [2, 3], group = 1 : i64, pads = [6, 11, 6, 11]} : (tensor<1x2x32x64xf32>, tensor<5x2x6x7xf32>) -> tensor<1x5x32x64xf32>
   // CHECK: return [[RES_ATTR]] : tensor<1x5x32x64xf32>
+}
 
+/// Test PadConstantValuePad
 
-/// Test PadConstantValuePad_1
 func @test_PadConstantValuePad_1(%arg0 : tensor<16x13xf32>) -> tensor<*xf32> {
   %0 = "onnx.PadConstantValuePad"(%arg0) {constant_value = 0.000000e+00 : f32, mode = "constant", pads = [0, 2, 0, 0]} : (tensor<16x13xf32>) -> tensor<*xf32>
   "std.return"(%0) : (tensor<*xf32>) -> ()
-}
-// CHECK-LABEL: test_PadConstantValuePad_1
-// CHECK: [[RES:%.+]] = "onnx.PadConstantValuePad"(%arg0) {constant_value = 0.000000e+00 : f32, mode = "constant", pads = [0, 2, 0, 0]} : (tensor<16x13xf32>) -> tensor<18x13xf32>
-// CHECK: return [[RES]] : tensor<18x13xf32>
 
-/// Test PadConstantPad_1
+  // CHECK-LABEL: test_PadConstantValuePad_1
+  // CHECK: [[RES:%.+]] = "onnx.PadConstantValuePad"(%arg0) {constant_value = 0.000000e+00 : f32, mode = "constant", pads = [0, 2, 0, 0]} : (tensor<16x13xf32>) -> tensor<18x13xf32>
+  // CHECK: return [[RES]] : tensor<18x13xf32>
+}
+
+/// Test PadConstantPad
+
 func @test_PadConstantPad_1(%arg0 : tensor<16x13xf32>, %arg1 : tensor<*xf32>) -> tensor<*xf32> {
   %0 = "onnx.PadConstantPad"(%arg0, %arg1) {mode = "constant", pads = [0, 2, 3, 1]} : (tensor<16x13xf32>, tensor<*xf32>) -> tensor<*xf32>
   "std.return"(%0) : (tensor<*xf32>) -> ()
-}
-// CHECK-LABEL: test_PadConstantPad_1
-// CHECK: [[RES:%.+]] = "onnx.PadConstantPad"(%arg0, %arg1) {mode = "constant", pads = [0, 2, 3, 1]} : (tensor<16x13xf32>, tensor<*xf32>) -> tensor<18x17xf32>
-// CHECK: return [[RES]] : tensor<18x17xf32>
 
+  // CHECK-LABEL: test_PadConstantPad_1
+  // CHECK: [[RES:%.+]] = "onnx.PadConstantPad"(%arg0, %arg1) {mode = "constant", pads = [0, 2, 3, 1]} : (tensor<16x13xf32>, tensor<*xf32>) -> tensor<18x17xf32>
+  // CHECK: return [[RES]] : tensor<18x17xf32>
+}