[MLIR:HLO] Add AllGather and AllToAll operations to LMHLO dialect.
- Use a common base class to for AllReduce, AllGather, and AllToAll in the ODS spec. - Add basic verification for replica groups attribute. PiperOrigin-RevId: 354969654
This commit is contained in:
parent
7a251aaed4
commit
8e3890e8e8
|
@ -645,6 +645,15 @@ class BASE_HLO_PartitionIdOp {
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class BASE_HLO_AllGatherOp {
|
||||||
|
string summary = "AllGather operator";
|
||||||
|
|
||||||
|
string description = [{
|
||||||
|
Performs concatenation across replicas.
|
||||||
|
|
||||||
|
See https://www.tensorflow.org/xla/operation_semantics#allgather
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
|
||||||
class BASE_HLO_AllReduceOp {
|
class BASE_HLO_AllReduceOp {
|
||||||
string summary = "AllReduce operator";
|
string summary = "AllReduce operator";
|
||||||
|
|
|
@ -544,9 +544,10 @@ def LHLO_ReducePrecisionOp: LHLO_Op<"reduce_precision", [SameTypeOperands]>,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
def LHLO_AllReduceOp : LHLO_Op<"all_reduce", [SameVariadicOperandSize]>,
|
// Common base class for AllReduce, AllGather, and AllToAll.
|
||||||
BASE_HLO_AllReduceOp {
|
class LHLO_CollectiveCommunicationOp<string name, list<OpTrait> traits = []> :
|
||||||
let arguments = (ins
|
LHLO_Op<name, !listconcat(traits, [SameVariadicOperandSize, SameOperandsElementType])> {
|
||||||
|
dag arguments_base = (ins
|
||||||
Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$operands,
|
Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$operands,
|
||||||
Arg<Variadic<LHLO_Buffer>, "", [MemWrite]>:$results,
|
Arg<Variadic<LHLO_Buffer>, "", [MemWrite]>:$results,
|
||||||
I64ElementsAttr:$replica_groups,
|
I64ElementsAttr:$replica_groups,
|
||||||
|
@ -554,14 +555,33 @@ def LHLO_AllReduceOp : LHLO_Op<"all_reduce", [SameVariadicOperandSize]>,
|
||||||
OptionalAttr<ChannelHandle>:$channel_id,
|
OptionalAttr<ChannelHandle>:$channel_id,
|
||||||
DefaultValuedAttr<BoolAttr, "false">:$use_global_device_ids
|
DefaultValuedAttr<BoolAttr, "false">:$use_global_device_ids
|
||||||
);
|
);
|
||||||
let regions = (region SizedRegion<1>:$computation);
|
|
||||||
let verifier = [{ return Verify(*this); }];
|
let verifier = [{ return Verify(*this); }];
|
||||||
let extraClassDeclaration = [{
|
let extraClassDeclaration = [{
|
||||||
// AllReduce is cross replica if channel_id is not set.
|
// AllGather is cross replica if channel_id is not set.
|
||||||
bool IsCrossReplica() { return !channel_id().hasValue(); }
|
bool IsCrossReplica() { return !channel_id().hasValue(); }
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def LHLO_AllGatherOp : LHLO_CollectiveCommunicationOp<"all_gather">,
|
||||||
|
BASE_HLO_AllGatherOp {
|
||||||
|
let arguments = !con(
|
||||||
|
arguments_base,
|
||||||
|
(ins I64Attr:$all_gather_dimension));
|
||||||
|
}
|
||||||
|
|
||||||
|
def LHLO_AllReduceOp : LHLO_CollectiveCommunicationOp<"all_reduce">,
|
||||||
|
BASE_HLO_AllReduceOp {
|
||||||
|
let arguments = arguments_base;
|
||||||
|
let regions = (region SizedRegion<1>:$computation);
|
||||||
|
}
|
||||||
|
|
||||||
|
def LHLO_AllToAllOp : LHLO_CollectiveCommunicationOp<"all_to_all">,
|
||||||
|
BASE_HLO_AllToAllOp {
|
||||||
|
let arguments = !con(
|
||||||
|
arguments_base,
|
||||||
|
(ins OptionalAttr<I64Attr>:$split_dimension));
|
||||||
|
}
|
||||||
|
|
||||||
def LHLO_CollectivePermuteOp: LHLO_Op<"collective_permute", [SameTypeOperands]>,
|
def LHLO_CollectivePermuteOp: LHLO_Op<"collective_permute", [SameTypeOperands]>,
|
||||||
BASE_HLO_CollectivePermuteOp {
|
BASE_HLO_CollectivePermuteOp {
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ limitations under the License.
|
||||||
#include "llvm/ADT/APInt.h"
|
#include "llvm/ADT/APInt.h"
|
||||||
#include "llvm/ADT/ArrayRef.h"
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include "llvm/ADT/SmallSet.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
#include "llvm/Support/FormatVariadic.h"
|
#include "llvm/Support/FormatVariadic.h"
|
||||||
|
@ -56,11 +57,64 @@ LmhloDialect::LmhloDialect(MLIRContext* context)
|
||||||
>();
|
>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verifies replica groups attached to collective communication operations.
|
||||||
|
// If the attribute is not empty, it must be a rank 2 tensor, and each replica
|
||||||
|
// should appear exactly once. If `is_uniform_sized` is true, then we also check
|
||||||
|
// that each group is of the same size. If the operation has
|
||||||
|
// `use_global_device_id` set, then replica group cannot be empty.
|
||||||
|
template <typename OpT>
|
||||||
|
LogicalResult VerifyReplicaGroups(OpT op, bool is_uniform_sized) {
|
||||||
|
DenseIntElementsAttr attr = op.replica_groups();
|
||||||
|
auto replica_group_type = attr.getType().dyn_cast<RankedTensorType>();
|
||||||
|
if (!replica_group_type || replica_group_type.getRank() != 2 ||
|
||||||
|
!replica_group_type.getElementType().isInteger(/*width=*/64))
|
||||||
|
return op.emitOpError(
|
||||||
|
"replica groups should be a rank 2 tensor of 64 bit integers");
|
||||||
|
|
||||||
|
if (replica_group_type.getShape().equals(ArrayRef<int64_t>{0, 0}))
|
||||||
|
return success();
|
||||||
|
|
||||||
|
int64_t max_replica_id_seen = 0;
|
||||||
|
llvm::SmallSet<int64_t, 8> replica_seen;
|
||||||
|
for (int64_t id : attr.getValues<int64_t>()) {
|
||||||
|
if (is_uniform_sized && id == -1) {
|
||||||
|
return op.emitOpError("Invalid replica id -1");
|
||||||
|
}
|
||||||
|
if (id != -1) {
|
||||||
|
if (!replica_seen.insert(id).second) {
|
||||||
|
return op.emitOpError("replica id #") << id << " seen more than once";
|
||||||
|
}
|
||||||
|
max_replica_id_seen = std::max(max_replica_id_seen, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int64_t id = 0; id <= max_replica_id_seen; id++) {
|
||||||
|
if (!replica_seen.contains(id)) {
|
||||||
|
return op.emitOpError("replica id #")
|
||||||
|
<< id << " not seen in replica groups";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(jurahul): Add verification for output shape.
|
||||||
|
static LogicalResult Verify(AllGatherOp op) {
|
||||||
|
return VerifyReplicaGroups(op, /*is_uniform_sized=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(jurahul): Add verification for output shape.
|
||||||
|
static LogicalResult Verify(AllToAllOp op) {
|
||||||
|
return VerifyReplicaGroups(op, /*is_uniform_sized=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AllReduceOp
|
// AllReduceOp
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
static LogicalResult Verify(AllReduceOp op) {
|
static LogicalResult Verify(AllReduceOp op) {
|
||||||
|
if (failed(VerifyReplicaGroups(op, /*is_uniform_sized=*/false)))
|
||||||
|
return failure();
|
||||||
|
|
||||||
// AllReduce had variadic operands and results that have the same size.
|
// AllReduce had variadic operands and results that have the same size.
|
||||||
// Each memeber of the operand should have the same type as the corresponding
|
// Each memeber of the operand should have the same type as the corresponding
|
||||||
// member of the result.
|
// member of the result.
|
||||||
|
@ -73,21 +127,6 @@ static LogicalResult Verify(AllReduceOp op) {
|
||||||
<< it.index() << " (type: " << operandType << ") and result #"
|
<< it.index() << " (type: " << operandType << ") and result #"
|
||||||
<< it.index() << " (type: " << resultType << ") to have same type";
|
<< it.index() << " (type: " << resultType << ") to have same type";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since AllReduce has a single reduction computation attached to it (which is
|
|
||||||
// applied over all the operands and results), they all need to have the same
|
|
||||||
// element type. Since we already check that each operand and corresponding
|
|
||||||
// result has the same type, its sufficient to check just the memref element
|
|
||||||
// type for each operands.
|
|
||||||
Type elementType =
|
|
||||||
op.operands().front().getType().cast<MemRefType>().getElementType();
|
|
||||||
bool allMatch = llvm::all_of(
|
|
||||||
op.operands().drop_front().getType(), [elementType](Type type) {
|
|
||||||
return type.cast<MemRefType>().getElementType() == elementType;
|
|
||||||
});
|
|
||||||
if (!allMatch)
|
|
||||||
return op.emitOpError("requires all operands to have same element type");
|
|
||||||
|
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ func @invalid_allreduce(%input0: memref<2xf32>, %input1: memref<3xf32>) {
|
||||||
"mhlo.return"(%add) : (tensor<f32>) -> ()
|
"mhlo.return"(%add) : (tensor<f32>) -> ()
|
||||||
})
|
})
|
||||||
{channel_id = {handle = 1 : i64, type = 0 : i64}, constrain_layout = false,
|
{channel_id = {handle = 1 : i64, type = 0 : i64}, constrain_layout = false,
|
||||||
replica_groups = dense<[[0, 1, 2, 3], [5, 6, 7, 8]]> : tensor<2x4xi64>,
|
replica_groups = dense<[[0, 1, 2, 3], [5, 6, 7, 4]]> : tensor<2x4xi64>,
|
||||||
use_global_device_ids = false} : (memref<2xf32>, memref<3xf32>, memref<2xf32>, memref<2xf32>) -> ()
|
use_global_device_ids = false} : (memref<2xf32>, memref<3xf32>, memref<2xf32>, memref<2xf32>) -> ()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,7 @@ func @invalid_allreduce(%input0: memref<2xf32>, %input1: memref<3xf32>) {
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
func @invalid_allreduce(%input0: memref<2xf32>, %input1: memref<3xf16>) {
|
func @invalid_allreduce(%input0: memref<2xf32>, %input1: memref<3xf16>) {
|
||||||
// expected-error@+1 {{requires all operands to have same element type}}
|
// expected-error@+1 {{requires the same element type for all operands}}
|
||||||
"lmhlo.all_reduce"(%input0, %input1, %input0, %input1) ({
|
"lmhlo.all_reduce"(%input0, %input1, %input0, %input1) ({
|
||||||
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):
|
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):
|
||||||
%add = mhlo.add %arg0, %arg1 : tensor<f32>
|
%add = mhlo.add %arg0, %arg1 : tensor<f32>
|
||||||
|
@ -32,6 +32,39 @@ func @invalid_allreduce(%input0: memref<2xf32>, %input1: memref<3xf16>) {
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
|
func @invalid_allgather(%input0: memref<2xf32>, %output: memref<8xf32>) {
|
||||||
|
// expected-error@+1 {{replica id #1 seen more than once}}
|
||||||
|
"lmhlo.all_gather"(%input0, %output)
|
||||||
|
{channel_id = {handle = 1 : i64, type = 0 : i64}, constrain_layout = false,
|
||||||
|
replica_groups = dense<[[0, 1, 1, 3], [5, 6, 7, 8]]> : tensor<2x4xi64>,
|
||||||
|
use_global_device_ids = false, all_gather_dimension = 0 : i64} : (memref<2xf32>, memref<8xf32>) -> ()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @invalid_alltoall(%input0: memref<2xf32>, %output: memref<8xf32>) {
|
||||||
|
// expected-error@+1 {{replica id #4 not seen in replica groups}}
|
||||||
|
"lmhlo.all_to_all"(%input0, %output)
|
||||||
|
{channel_id = {handle = 1 : i64, type = 0 : i64}, constrain_layout = false,
|
||||||
|
replica_groups = dense<[[0, 1, 2, 3], [5, 6, 7, 8]]> : tensor<2x4xi64>,
|
||||||
|
use_global_device_ids = false, all_gather_dimension = 0 : i64} : (memref<2xf32>, memref<8xf32>) -> ()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @invalid_alltoall(%input0: memref<2xf32>, %output: memref<8xf32>) {
|
||||||
|
// expected-error@+1 {{replica groups should be a rank 2 tensor of 64 bit integers}}
|
||||||
|
"lmhlo.all_to_all"(%input0, %output)
|
||||||
|
{channel_id = {handle = 1 : i64, type = 0 : i64}, constrain_layout = false,
|
||||||
|
replica_groups = dense<0> : tensor<1xi64>,
|
||||||
|
use_global_device_ids = false, all_gather_dimension = 0 : i64} : (memref<2xf32>, memref<8xf32>) -> ()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
// CHECK-LABEL: func @ceil
|
// CHECK-LABEL: func @ceil
|
||||||
func @ceil(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
|
func @ceil(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
|
||||||
"lmhlo.ceil"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
|
"lmhlo.ceil"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
|
||||||
|
|
Loading…
Reference in New Issue