[HLO] Add AllReduceScatter to MHLO and LMHLO dialects.

PiperOrigin-RevId: 379296198
2021-06-14 09:36:23 -07:00 · 2021-06-14 09:36:23 -07:00 · a6011d0279
parent dbfa4b1537
commit a6011d0279
8 changed files with 220 additions and 0 deletions
--- a/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td
+++ b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td
@ -954,6 +954,26 @@ def HLO_AllReduceOp : HLO_Op<"all_reduce",
  let hasCustomHLOConverter = 1;
 }

+def HLO_AllReduceScatterOp : HLO_Op<"all_reduce_scatter",
+    [SameOperandsAndResultElementType]> {
+  let summary = "AllReduceScatter operator";
+  let description = [{
+     Performs all_reduce followed by a scatter.
+
+     See https://www.tensorflow.org/xla/operation_semantics#allreducescatter
+  }];
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64Attr:$scatter_dimension,
+    I64ElementsAttr:$replica_groups,
+    OptionalAttr<ChannelHandle>:$channel_handle
+  );
+  let regions = (region SizedRegion<1>:$computation);
+  let results = (outs HLO_Tensor);
+  let hasCustomHLOConverter = 1;
+}
+
 def HLO_AllToAllOp : HLO_Op<"all_to_all",
    [NoSideEffect, SameOperandsElementType, SameOperandsShape]> {

--- a/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_common.h
+++ b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_common.h
@ -30,6 +30,10 @@ namespace hlo {
 LogicalResult VerifyCollectivePermuteSourceTargetPairs(
    Operation* op, DenseIntElementsAttr attr);

+LogicalResult VerifyAllReduceScatter(Operation* op, TypeRange operand_types,
+                                     TypeRange result_types,
+                                     uint64_t scatter_dimension);
+
 // Custom formatting for convolution window attributes.
 void printWindowAttributes(OpAsmPrinter& p, Operation* op,
                           llvm::Optional<DenseIntElementsAttr> window_strides,
--- a/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td
+++ b/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td
@ -1105,6 +1105,19 @@ def LHLO_AllReduceOp : LHLO_CollectiveCommunicationOp<"all_reduce", [SameOperand
  let regions = (region SizedRegion<1>:$computation);
 }

+def LHLO_AllReduceScatterOp : LHLO_CollectiveCommunicationOp<"all_reduce_scatter", [SameOperandsElementType]> {
+  let summary = "AllReduceScatter operator";
+  let description = [{
+     Performs all_reduce followed by a scatter.
+
+     See https://www.tensorflow.org/xla/operation_semantics#allreducescatter
+  }];
+  let arguments = !con(
+    arguments_base,
+    (ins I64Attr:$scatter_dimension));
+  let regions = (region SizedRegion<1>:$computation);
+}
+
 def LHLO_AllToAllOp : LHLO_CollectiveCommunicationOp<"all_to_all", [SameOperandsElementType]> {
  let arguments = !con(
    arguments_base,
--- a/lib/Dialect/mhlo/IR/hlo_ops.cc
+++ b/lib/Dialect/mhlo/IR/hlo_ops.cc
@ -196,6 +196,18 @@ Value MaybeCastTo(OpBuilder& b, Location loc, Value value, Type type) {

 }  // namespace

+//===----------------------------------------------------------------------===//
+// AllReduceScatterOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(AllReduceScatterOp op) {
+  return mlir::hlo::VerifyAllReduceScatter(
+      op,
+      /*operand_types=*/{op.operand().getType()},
+      /*result_types=*/{op.getType()},
+      /*scatter_dimension=*/op.scatter_dimension());
+}
+
 //===----------------------------------------------------------------------===//
 // ConstOp
 //===----------------------------------------------------------------------===//
--- a/lib/Dialect/mhlo/IR/hlo_ops_common.cc
+++ b/lib/Dialect/mhlo/IR/hlo_ops_common.cc
@ -53,6 +53,51 @@ LogicalResult VerifyCollectivePermuteSourceTargetPairs(
  return success();
 }

+LogicalResult VerifyAllReduceScatter(Operation *op, TypeRange operand_types,
+                                     TypeRange result_types,
+                                     uint64_t scatter_dimension) {
+  // If operand and result are both ranked, then the size of the scatter
+  // dimension in the operand should be a multiple of the size of the scatter
+  // dimension in the result.
+  for (auto it : llvm::zip(operand_types, result_types)) {
+    auto operand_type = std::get<0>(it).cast<ShapedType>();
+    auto result_type = std::get<1>(it).cast<ShapedType>();
+    if (!operand_type.hasRank() || !result_type.hasRank()) continue;
+    if (operand_type.getRank() != result_type.getRank())
+      return op->emitOpError() << "operand and result should have same rank";
+    if (scatter_dimension >= operand_type.getRank())
+      return op->emitOpError()
+             << "scatter dim should be less than operand/result rank";
+    if (operand_type.isDynamicDim(scatter_dimension) ||
+        result_type.isDynamicDim(scatter_dimension))
+      continue;
+    if (operand_type.getDimSize(scatter_dimension) == 0)
+      return op->emitOpError() << "operand scatter dimension cannot be zero";
+    if (result_type.getDimSize(scatter_dimension) == 0)
+      return op->emitOpError() << "result scatter dimension cannot be zero";
+    if ((operand_type.getDimSize(scatter_dimension) %
+         result_type.getDimSize(scatter_dimension)) != 0)
+      return op->emitOpError()
+             << "operand scatter dimension has size "
+             << operand_type.getDimSize(scatter_dimension)
+             << ", expected to be a multiple of result scatter dimension size "
+             << result_type.getDimSize(scatter_dimension);
+
+    // Non scatter dimensions should be equal.
+    for (uint64_t index : llvm::seq<uint64_t>(0, operand_type.getRank())) {
+      if (index == scatter_dimension || operand_type.isDynamicDim(index) ||
+          result_type.isDynamicDim(index))
+        continue;
+      if (operand_type.getDimSize(index) != result_type.getDimSize(index))
+        return op->emitOpError()
+               << "non scatter dimensions should be same for operand ("
+               << operand_type.getDimSize(index) << ") and result ("
+               << result_type.getDimSize(index) << ")";
+    }
+  }
+  return success();
+}
+
 namespace {
 // Custom formatting for convolution window attributes.
 void printWindowAttribute(OpAsmPrinter &p, DenseElementsAttr attribute) {
--- a/lib/Dialect/mhlo/IR/lhlo_ops.cc
+++ b/lib/Dialect/mhlo/IR/lhlo_ops.cc
@ -159,6 +159,21 @@ static LogicalResult Verify(AllReduceOp op) {
  return success();
 }

+//===----------------------------------------------------------------------===//
+// AllReduceScatterOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(AllReduceScatterOp op) {
+  if (failed(VerifyReplicaGroups(op, /*is_uniform_sized=*/true)))
+    return failure();
+  if (failed(mlir::hlo::VerifyAllReduceScatter(
+          op, /*operand_types=*/op.operands().getTypes(),
+          /*result_types=*/op.results().getTypes(),
+          /*scatter_dimension=*/op.scatter_dimension())))
+    return failure();
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // CaseOp
 //===----------------------------------------------------------------------===//
--- a/tests/lhlo_ops.mlir
+++ b/tests/lhlo_ops.mlir
@ -32,6 +32,19 @@ func @invalid_allreduce(%input0: memref<2xf32>, %input1: memref<3xf16>) {

 // -----

+// CHECK-LABEL: func @reduce_scatter
+func @reduce_scatter(%data: memref<4x16xf32>, %result:memref<4x4xf32>) {
+  "lmhlo.all_reduce_scatter"(%data, %result) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (memref<4x16xf32>, memref<4x4xf32>) -> ()
+  return
+}
+// -----
+
 // CHECK-LABEL: func @mixed_types_allgather
 func @mixed_types_allgather(%a0: memref<1x1xf32>, %a1:memref<1x1xi32>) {
  "lmhlo.all_gather"(%a0, %a1, %a0, %a1) {all_gather_dimension = 0 : i64,
--- a/tests/ops.mlir
+++ b/tests/ops.mlir
@ -13,6 +13,104 @@ func private @invalid_type() -> !mhlo.foobar

 // -----

+// CHECK-LABEL: func @reduce_scatter
+func @reduce_scatter(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (tensor<4x16xf32>) -> tensor<4x4xf32>
+  return %0 : tensor<4x4xf32>
+}
+
+// -----
+
+func @invalid_reduce_scatter(%data: tensor<4x16xf32>) -> tensor<4x5xf32> {
+  // expected-error@+1 {{operand scatter dimension has size 16, expected to be a multiple of result scatter dimension size 5}}
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (tensor<4x16xf32>) -> tensor<4x5xf32>
+  return %0 : tensor<4x5xf32>
+}
+
+// -----
+
+func @invalid_reduce_scatter(%data: tensor<4x0xf32>) -> tensor<4x4xf32> {
+  // expected-error@+1 {{operand scatter dimension cannot be zero}}
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (tensor<4x0xf32>) -> tensor<4x4xf32>
+  return %0 : tensor<4x4xf32>
+}
+
+// -----
+
+func @invalid_reduce_scatter(%data: tensor<4x16xf32>) -> tensor<4x0xf32> {
+  // expected-error@+1 {{result scatter dimension cannot be zero}}
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (tensor<4x16xf32>) -> tensor<4x0xf32>
+  return %0 : tensor<4x0xf32>
+}
+
+// -----
+
+func @invalid_reduce_scatter(%data: tensor<4x16xf32>) -> tensor<4xf32> {
+  // expected-error@+1 {{operand and result should have same rank}}
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (tensor<4x16xf32>) -> tensor<4xf32>
+  return %0 : tensor<4xf32>
+}
+
+// -----
+
+func @invalid_reduce_scatter(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
+  // expected-error@+1 {{scatter dim should be less than operand/result rank}}
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 4 : i64} : (tensor<4x16xf32>) -> tensor<4x4xf32>
+  return %0 : tensor<4x4xf32>
+}
+
+// -----
+
+func @invalid_reduce_scatter(%data: tensor<4x16xf32>) -> tensor<3x4xf32> {
+  // expected-error@+1 {{non scatter dimensions should be same for operand (4) and result (3)}}
+  %0 = "mhlo.all_reduce_scatter"(%data) ( {
+    // reduction computation
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+    %1 = mhlo.add %arg2, %arg3 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64} : (tensor<4x16xf32>) -> tensor<3x4xf32>
+  return %0 : tensor<3x4xf32>
+}
+
+// -----
+
 // CHECK-LABEL: func @alltoall
 func @alltoall(%data: tensor<4x16xf32>) -> tensor<16x4xf32> {
  %0 = "mhlo.all_to_all"(%data) {