[XLA:GPU] Add layout attributes to LHLO_GPU Convolution operations.
- MLIR MemRefs do not preserve layout information correctly when unit dimensions are involved. Operations like convolution that use cuDNN however need the correct layout to be preserved so that we do not end up creating an incompatible combination of input/filter/output layout that is not supported by cuDNN. - Add these layouts to convolution attributes in the form of I32ArrayAttr for representing the layout in "minor_to_major" form similar to XLA. PiperOrigin-RevId: 348034757
This commit is contained in:
parent
dc7e63f74c
commit
8134bff98d
|
@ -21,7 +21,17 @@ include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_base.td"
|
||||||
def ConvolutionBackendConfigAttr : StructAttr<"ConvolutionBackendConfig",
|
def ConvolutionBackendConfigAttr : StructAttr<"ConvolutionBackendConfig",
|
||||||
LHLO_GPU_Dialect, [
|
LHLO_GPU_Dialect, [
|
||||||
StructFieldAttr<"algorithm", I64Attr>,
|
StructFieldAttr<"algorithm", I64Attr>,
|
||||||
StructFieldAttr<"tensor_ops_enabled", BoolAttr>]> {
|
StructFieldAttr<"tensor_ops_enabled", BoolAttr>,
|
||||||
|
// The following 3 attributes describe the layout as an array of integers
|
||||||
|
// that list the dimensions in minor-to-major order similar to XLA's layout
|
||||||
|
// representation. operand_0_layout and operand_0_layout described the layout
|
||||||
|
// of the first 2 operands of the convolution, and result_layout describes
|
||||||
|
// the layout of the primary output operand of the convolution.
|
||||||
|
// Note: Not using names like input_layout or filter_layout as `input` may be
|
||||||
|
// an input operand (for ConvForward) but output for ConvBackward.
|
||||||
|
StructFieldAttr<"operand_0_layout", I64ArrayAttr>,
|
||||||
|
StructFieldAttr<"operand_1_layout", I64ArrayAttr>,
|
||||||
|
StructFieldAttr<"result_layout", I64ArrayAttr>]> {
|
||||||
let description = "GPU Convolution backend configuration";
|
let description = "GPU Convolution backend configuration";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,8 +50,11 @@ func @conv_forward(%input : memref<1x1x8x8xf16>, %filter: memref<1x1x2x2xf16>, %
|
||||||
feature_group_count = 1,
|
feature_group_count = 1,
|
||||||
batch_group_count = 1,
|
batch_group_count = 1,
|
||||||
result_scale = 1.0,
|
result_scale = 1.0,
|
||||||
backend_config = {algorithm=0, tensor_ops_enabled = true }
|
backend_config = {algorithm=0,
|
||||||
}
|
operand_0_layout = [3,2,1,0],
|
||||||
|
operand_1_layout = [3,2,1,0],
|
||||||
|
result_layout = [3,2,1,0],
|
||||||
|
tensor_ops_enabled = true}}
|
||||||
: (memref<1x1x8x8xf16>, memref<1x1x2x2xf16>, memref<1x1x7x7xf16>, memref<32xi8>) -> ()
|
: (memref<1x1x8x8xf16>, memref<1x1x2x2xf16>, memref<1x1x7x7xf16>, memref<32xi8>) -> ()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -60,7 +63,11 @@ func @conv_forward(%input : memref<1x1x8x8xf16>, %filter: memref<1x1x2x2xf16>, %
|
||||||
func @conv_backfilter(%input : memref<3x56x56x16xf64>, %filter: memref<3x3x3x64xf64>, %output: memref<54x54x16x64xf64>) {
|
func @conv_backfilter(%input : memref<3x56x56x16xf64>, %filter: memref<3x3x3x64xf64>, %output: memref<54x54x16x64xf64>) {
|
||||||
%scratch = alloc() : memref<23328xui8>
|
%scratch = alloc() : memref<23328xui8>
|
||||||
"lmhlo_gpu.conv_backwardfilter"(%input, %filter, %output, %scratch)
|
"lmhlo_gpu.conv_backwardfilter"(%input, %filter, %output, %scratch)
|
||||||
{ backend_config = {algorithm = 1 : i64, tensor_ops_enabled = false},
|
{ backend_config = {algorithm = 1 : i64,
|
||||||
|
operand_0_layout = [3,2,1,0],
|
||||||
|
operand_1_layout = [3,2,1,0],
|
||||||
|
result_layout = [3,2,1,0],
|
||||||
|
tensor_ops_enabled = false},
|
||||||
batch_group_count = 1 : i64,
|
batch_group_count = 1 : i64,
|
||||||
dimension_numbers = {input_batch_dimension = 0 : i64,
|
dimension_numbers = {input_batch_dimension = 0 : i64,
|
||||||
input_feature_dimension = 3 : i64,
|
input_feature_dimension = 3 : i64,
|
||||||
|
@ -86,7 +93,11 @@ func @conv_backfilter(%input : memref<3x56x56x16xf64>, %filter: memref<3x3x3x64x
|
||||||
func @conv_backinput(%input : memref<4x5x16x16xf64>, %filter : memref<5x3x7x7xf64>, %output : memref<4x3x16x16xf64>) {
|
func @conv_backinput(%input : memref<4x5x16x16xf64>, %filter : memref<5x3x7x7xf64>, %output : memref<4x3x16x16xf64>) {
|
||||||
%scratch = alloc() : memref<32xui8>
|
%scratch = alloc() : memref<32xui8>
|
||||||
"lmhlo_gpu.conv_backwardinput"(%input, %filter, %output, %scratch)
|
"lmhlo_gpu.conv_backwardinput"(%input, %filter, %output, %scratch)
|
||||||
{ backend_config = {algorithm = 1 : i64, tensor_ops_enabled = false},
|
{ backend_config = {algorithm = 1 : i64,
|
||||||
|
operand_0_layout = [3,2,1,0],
|
||||||
|
operand_1_layout = [3,2,1,0],
|
||||||
|
result_layout = [3,2,1,0],
|
||||||
|
tensor_ops_enabled = false},
|
||||||
batch_group_count = 1 : i64,
|
batch_group_count = 1 : i64,
|
||||||
dimension_numbers = {input_batch_dimension = 0 : i64,
|
dimension_numbers = {input_batch_dimension = 0 : i64,
|
||||||
input_feature_dimension = 1 : i64,
|
input_feature_dimension = 1 : i64,
|
||||||
|
@ -114,7 +125,11 @@ func @conv_fused(%input : memref<1x17x9x9xf16>, %filter : memref<3x3x17x32xf16>,
|
||||||
%scratch = alloc() : memref<32xui8>
|
%scratch = alloc() : memref<32xui8>
|
||||||
"lmhlo_gpu.conv_forward_fused"(%input, %filter, %bias, %output, %scratch)
|
"lmhlo_gpu.conv_forward_fused"(%input, %filter, %bias, %output, %scratch)
|
||||||
{activation_mode = "Relu",
|
{activation_mode = "Relu",
|
||||||
backend_config = {algorithm = 0 : i64, tensor_ops_enabled = false},
|
backend_config = {algorithm = 1 : i64,
|
||||||
|
operand_0_layout = [3,2,1,0],
|
||||||
|
operand_1_layout = [3,2,1,0],
|
||||||
|
result_layout = [3,2,1,0],
|
||||||
|
tensor_ops_enabled = false},
|
||||||
batch_group_count = 1 : i64,
|
batch_group_count = 1 : i64,
|
||||||
dimension_numbers = {input_batch_dimension = 0 : i64,
|
dimension_numbers = {input_batch_dimension = 0 : i64,
|
||||||
input_feature_dimension = 1 : i64,
|
input_feature_dimension = 1 : i64,
|
||||||
|
@ -141,7 +156,11 @@ func @conv_fused_side_input(%input : memref<1x17x9x9xf16>, %filter : memref<3x3x
|
||||||
%scratch = alloc() : memref<0xui8>
|
%scratch = alloc() : memref<0xui8>
|
||||||
"lmhlo_gpu.conv_forward_fused_with_side_input"(%input, %filter, %bias, %side_input, %output, %scratch)
|
"lmhlo_gpu.conv_forward_fused_with_side_input"(%input, %filter, %bias, %side_input, %output, %scratch)
|
||||||
{activation_mode = "Relu",
|
{activation_mode = "Relu",
|
||||||
backend_config = {algorithm = 0 : i64, tensor_ops_enabled = false},
|
backend_config = {algorithm = 1 : i64,
|
||||||
|
operand_0_layout = [3,2,1,0],
|
||||||
|
operand_1_layout = [3,2,1,0],
|
||||||
|
result_layout = [3,2,1,0],
|
||||||
|
tensor_ops_enabled = false},
|
||||||
batch_group_count = 1 : i64,
|
batch_group_count = 1 : i64,
|
||||||
dimension_numbers = {input_batch_dimension = 0 : i64,
|
dimension_numbers = {input_batch_dimension = 0 : i64,
|
||||||
input_feature_dimension = 1 : i64,
|
input_feature_dimension = 1 : i64,
|
||||||
|
|
Loading…
Reference in New Issue