Support CHLO broadcasting operations between scalar and unranked tensors.

This is done through reshaping the unranked tensor into a 1D ranked tensor which will result in a safe broadcast/indexing logic when the other operand is a scalar. PiperOrigin-RevId: 322553661
2020-07-22 12:25:26 +00:00 · 2020-07-22 12:25:26 +00:00 · 4251630426
parent 63d62b7952
commit 4251630426
3 changed files with 144 additions and 4 deletions
--- a/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc
+++ b/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc
@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/SCF/SCF.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/Shape/IR/Shape.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OperationSupport.h"
@ -22,6 +24,7 @@ limitations under the License.
 #include "third_party/llvm/llvm-project/mlir/include/mlir/Transforms/DialectConversion.h"
 #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
 #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h"
 #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h"

 namespace mlir {
@ -74,10 +77,6 @@ struct ConvertTrivialNonBroadcastBinaryOp : public OpRewritePattern<ChloOpTy> {
 //   - Legal combinations of degenerate (1-dim) implicit broadcasting.
 // The restriction on broadcast_dims derives from the definition of the
 // `shape.broadcast` op, which only supports prefix-padding.
-//
-// It may be possible to expand this pattern to operate on unranked tensors in
-// the future by emitting more code to dynamically differentiate based on rank.
-// Whether that is of any practical benefit remains to be seen.
 template <typename ChloOpTy, typename HloOpTy, typename Adaptor>
 struct ConvertRankedDynamicBroadcastBinaryOp
    : public OpRewritePattern<ChloOpTy> {
@ -160,6 +159,68 @@ struct ConvertRankedDynamicBroadcastBinaryOp
  }
 };

+// Converts a broadcasting binary operation with a scalar operand and an
+// unranked operand to a ranked broadcasting operation by dynamically reshaping
+// the unranked operand to a 1D tensor. This will always be safe because
+// broadcasting from a scalar to another shape always works.
+template <typename ChloOpTy, typename HloOpTy>
+struct ConvertUnrankedScalarDynamicBroadcastBinaryOp
+    : public OpRewritePattern<ChloOpTy> {
+  using OpRewritePattern<ChloOpTy>::OpRewritePattern;
+  LogicalResult matchAndRewrite(ChloOpTy op,
+                                PatternRewriter &rewriter) const override {
+    auto loc = op.getLoc();
+    Value lhs = op.lhs();
+    Value rhs = op.rhs();
+
+    auto lhs_ranked_type = lhs.getType().dyn_cast<RankedTensorType>();
+    auto lhs_unranked_type = lhs.getType().dyn_cast<UnrankedTensorType>();
+
+    auto rhs_ranked_type = rhs.getType().dyn_cast<RankedTensorType>();
+    auto rhs_unranked_type = rhs.getType().dyn_cast<UnrankedTensorType>();
+
+    bool lhs_is_scalar = lhs_ranked_type &&
+                         lhs_ranked_type.getShape().empty() &&
+                         rhs_unranked_type;
+    bool rhs_is_scalar = rhs_ranked_type &&
+                         rhs_ranked_type.getShape().empty() &&
+                         lhs_unranked_type;
+
+    // Only support the case where exactly one operand is scalar and the other
+    // is unranked. Other patterns in this file will create more efficient
+    // lowerings for cases where both ranks are known or will handle the more
+    // generic case of both inputs being unranked.
+    if (!(lhs_is_scalar ^ rhs_is_scalar)) return failure();
+
+    auto result_type = op.getResult().getType().template dyn_cast<TensorType>();
+
+    // Reshape the non-scalar value into a dynamically sized, rank-1 tensor
+    Value shape =
+        rewriter.create<shape::ShapeOfOp>(loc, lhs_is_scalar ? rhs : lhs);
+    Value num_elements = rewriter.create<shape::NumElementsOp>(loc, shape);
+    Value size = rewriter.create<shape::SizeToIndexOp>(loc, num_elements);
+    Value size_tensor = rewriter.create<TensorFromElementsOp>(loc, size);
+    Value reshaped = rewriter.create<mhlo::DynamicReshapeOp>(
+        loc, RankedTensorType::get({-1}, result_type.getElementType()),
+        lhs_is_scalar ? rhs : lhs, size_tensor);
+
+    // Create a new ranked Chlo op that will be further lowered by other
+    // patterns into Mhlo.
+    SmallVector<Value, 2> operands{lhs_is_scalar ? lhs : reshaped,
+                                   rhs_is_scalar ? rhs : reshaped};
+    Value computed = rewriter.create<ChloOpTy>(
+        loc, SmallVector<Type, 1>{reshaped.getType()}, operands, op.getAttrs());
+
+    // Reshape the result back into an unranked tensor.
+    Value shape_tensor = rewriter.create<shape::ToExtentTensorOp>(
+        loc, RankedTensorType::get({-1}, rewriter.getIndexType()), shape);
+    rewriter.replaceOpWithNewOp<mhlo::DynamicReshapeOp>(op, result_type,
+                                                        computed, shape_tensor);
+
+    return success();
+  }
+};
+
 template <typename ChloOpTy, typename HloOpTy, typename Adaptor>
 void PopulateForBinaryOp(MLIRContext *context,
                         OwningRewritePatternList *patterns) {
@ -169,6 +230,9 @@ void PopulateForBinaryOp(MLIRContext *context,
  patterns->insert<
      ConvertRankedDynamicBroadcastBinaryOp<ChloOpTy, HloOpTy, Adaptor>>(
      context, 5);
+  patterns->insert<
+      ConvertUnrankedScalarDynamicBroadcastBinaryOp<ChloOpTy, HloOpTy>>(
+      context);
 }

 template <typename FromOpTy, typename ToOpTy>
--- a/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc
+++ b/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/SCF/SCF.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/Shape/IR/Shape.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h"
 #include "third_party/llvm/llvm-project/mlir/include/mlir/Pass/Pass.h"
@ -37,6 +38,7 @@ struct TestChloLegalizeToHloPass
    // The conversion uses helpers from the Standard dialect.
    conversionTarget.addLegalDialect<mlir::StandardOpsDialect>();
    conversionTarget.addLegalDialect<mlir::shape::ShapeDialect>();
+    conversionTarget.addLegalDialect<mlir::scf::SCFDialect>();

    PopulateLegalizeChloToHloPatterns(&getContext(), &conversionPatterns);

--- a/tests/chlo_legalize_to_hlo_broadcasts.mlir
+++ b/tests/chlo_legalize_to_hlo_broadcasts.mlir
@ -237,3 +237,77 @@ func @xorWithoutBroadcast(%arg0: tensor<4xi1>, %arg1: tensor<4xi1>) -> tensor<4x
  %0 = chlo.broadcast_xor %arg0, %arg1 : (tensor<4xi1>, tensor<4xi1>) -> tensor<4xi1>
  return %0 : tensor<4xi1>
 }
+
+// -----
+func @addScalarUnranked(%arg0: tensor<f32>, %arg1: tensor<*xf32>) -> tensor<*xf32> {
+  %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<f32>, tensor<*xf32>)
+                                         -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+
+// CHECK-LABEL:   func @addScalarUnranked(
+// CHECK-SAME:                            %[[ARG_0:.*]]: tensor<f32>,
+// CHECK-SAME:                            %[[ARG_1:.*]]: tensor<*xf32>
+// CHECK-SAME:                            ) -> tensor<*xf32> {
+//                  First handle the dynamic reshaping of the unranked operand
+//                  to a 1D tensor.
+// CHECK:           %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor<*xf32>
+// CHECK:           %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE_1]]
+// CHECK:           %[[SIZE:.*]] = shape.size_to_index %[[NUM_ELEMENTS]]
+// CHECK:           %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[SIZE]]) : tensor<1xindex>
+// CHECK:           %[[RESHAPED:.*]] = "mhlo.dynamic_reshape"(%[[ARG_1]], %[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor<?xf32>
+//                  The assuming region is part of the second stage of lowering
+//                  with ranked broadcasting logic.
+// CHECK:           %[[SHAPE_0:.*]] = shape.shape_of %[[ARG_0]] : tensor<f32>
+// CHECK:           %[[SHAPE_RESHAPED:.*]] = shape.shape_of %[[RESHAPED]] : tensor<?xf32>
+// CHECK:           %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_0]], %[[SHAPE_RESHAPED]]
+// CHECK:           %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor<?xf32>) {
+// CHECK:             %[[SCALAR_SHAPE:.*]] = shape.const_shape []
+// CHECK:             %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]]
+// CHECK:             %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : tensor<1xindex>
+// CHECK:             %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>, tensor<1xindex>) -> tensor<?xf32>
+// CHECK:             %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<?xf32>, tensor<1xindex>) -> tensor<?xf32>
+// CHECK:             %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor<?xf32>
+// CHECK:             shape.assuming_yield %[[BROADCASTED_RESULT]] : tensor<?xf32>
+// CHECK:           }
+//                  As part of the unranked logic, the result is reshaped back
+//                  to an unranked tensor.
+// CHECK:           %[[PROPER_SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_1]] : tensor<?xindex>
+// CHECK:           %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[VAL_19:.*]], %[[PROPER_SHAPE_TENSOR]]) : (tensor<?xf32>, tensor<?xindex>) -> tensor<*xf32>
+// CHECK:           return %[[RESHAPED_RESULT]] : tensor<*xf32>
+// CHECK:         }
+
+// -----
+func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor<f32>) -> tensor<*xf32> {
+  %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<*xf32>, tensor<f32>)
+                                         -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+// CHECK-LABEL:   func @addUnrankedScalar(
+// CHECK-SAME:                            %[[ARG_0:.*]]: tensor<*xf32>,
+// CHECK-SAME:                            %[[ARG_1:.*]]: tensor<f32>) -> tensor<*xf32> {
+//                  First handle the dynamic reshaping of the unranked operand
+//                  to a 1D tensor.
+// CHECK:           %[[SHAPE_0:.*]] = shape.shape_of %[[ARG_0]] : tensor<*xf32>
+// CHECK:           %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE_0]]
+// CHECK:           %[[SIZE:.*]] = shape.size_to_index %[[NUM_ELEMENTS]]
+// CHECK:           %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[SIZE]]) : tensor<1xindex>
+// CHECK:           %[[RESHAPED:.*]] = "mhlo.dynamic_reshape"(%[[ARG_0]], %[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor<?xf32>
+//                  The assuming region is part of the second stage of lowering
+//                  with ranked broadcasting logic.
+// CHECK:           %[[SHAPE_RESHAPED:.*]] = shape.shape_of %[[RESHAPED]] : tensor<?xf32>
+// CHECK:           %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor<f32>
+// CHECK:           %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]]
+// CHECK:           %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor<?xf32>) {
+// CHECK:             %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] : tensor<1xindex>
+// CHECK:             %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<?xf32>, tensor<1xindex>) -> tensor<?xf32>
+// CHECK:             %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>, tensor<1xindex>) -> tensor<?xf32>
+// CHECK:             %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor<?xf32>
+// CHECK:             shape.assuming_yield %[[BROADCASTED_RESULT]] : tensor<?xf32>
+// CHECK:           }
+//                  As part of the unranked logic, the result is reshaped back
+//                  to an unranked tensor.
+// CHECK:           %[[PROPER_SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_0]] : tensor<?xindex>
+// CHECK:           %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[VAL_19:.*]], %[[PROPER_SHAPE_TENSOR]]) : (tensor<?xf32>, tensor<?xindex>) -> tensor<*xf32>
+// CHECK:           return %[[RESHAPED_RESULT]] : tensor<*xf32>
+// CHECK:         }