Add GPU specific LMHLO level ops
- Introduce operations in a new lmhlo_gpu dialect that map to GPU library function calls in the XLA:GPU backend. - Add basic unit tests as well. PiperOrigin-RevId: 337132166
This commit is contained in:
		
							parent
							
								
									8506f1f26a
								
							
						
					
					
						commit
						f6b4e6758a
					
				| 
						 | 
					@ -27,5 +27,6 @@ endfunction()
 | 
				
			||||||
add_mlir_hlo_dialect(chlo_ops chlo)
 | 
					add_mlir_hlo_dialect(chlo_ops chlo)
 | 
				
			||||||
add_mlir_hlo_dialect(hlo_ops mhlo)
 | 
					add_mlir_hlo_dialect(hlo_ops mhlo)
 | 
				
			||||||
add_mlir_hlo_dialect(lhlo_ops lmhlo)
 | 
					add_mlir_hlo_dialect(lhlo_ops lmhlo)
 | 
				
			||||||
 | 
					add_mlir_hlo_dialect(lhlo_gpu_ops lmhlo_gpu)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
add_mlir_interface(infer_fusibility_op_interface)
 | 
					add_mlir_interface(infer_fusibility_op_interface)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,55 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This file defines the operations used in the LHLO dialect.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_GPU_OPS_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_GPU_OPS_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "llvm/ADT/StringRef.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Attributes.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Dialect.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Location.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/MLIRContext.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/OpDefinition.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Operation.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/StandardTypes.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Types.h"
 | 
				
			||||||
 | 
					#include "mlir/Interfaces/CopyOpInterface.h"
 | 
				
			||||||
 | 
					#include "mlir/Interfaces/SideEffectInterfaces.h"
 | 
				
			||||||
 | 
					#include "mlir/Interfaces/ViewLikeInterface.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace mlir {
 | 
				
			||||||
 | 
					class OpBuilder;
 | 
				
			||||||
 | 
					}  // namespace mlir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.h.inc"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace mlir {
 | 
				
			||||||
 | 
					namespace lmhlo_gpu {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class LmhloGpuDialect : public Dialect {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  explicit LmhloGpuDialect(MLIRContext *context);
 | 
				
			||||||
 | 
					  static StringRef getDialectNamespace() { return "lmhlo_gpu"; }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace lmhlo_gpu
 | 
				
			||||||
 | 
					}  // end namespace mlir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GET_OP_CLASSES
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h.inc"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_GPU_OPS_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,230 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This is the operation definition file for LHMLO level GPU operations.
 | 
				
			||||||
 | 
					// Because these are LMHLO level operations, they operate on memrefs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef LHLO_GPU_OPS
 | 
				
			||||||
 | 
					#define LHLO_GPU_OPS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					include "mlir/IR/OpBase.td"
 | 
				
			||||||
 | 
					include "mlir/Interfaces/SideEffectInterfaces.td"
 | 
				
			||||||
 | 
					include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops_base.td"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLO_GPU_Dialect : Dialect {
 | 
				
			||||||
 | 
					  let name = "lmhlo_gpu";
 | 
				
			||||||
 | 
					  let cppNamespace = "::mlir::lmhlo_gpu";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class LHLOGPU_Op<string mnemonic, list<OpTrait> traits = []> :
 | 
				
			||||||
 | 
					  Op<LHLO_GPU_Dialect, mnemonic,
 | 
				
			||||||
 | 
					    !listconcat([MemoryEffects<[MemRead, MemWrite]>], traits)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Type for scratch buffers used by GPU library calls (memref<?xi8>)
 | 
				
			||||||
 | 
					def UntypedBuffer : MemRefRankOf<[I8], [1]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Cholesky info output buffer type.
 | 
				
			||||||
 | 
					def I32Buffer : MemRefOf<[I32]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					// LMHLO ops representing batch norm library functions.
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Note: these are semantically different from similar LHLO as the GPU library
 | 
				
			||||||
 | 
					// calls generate or consume standard deviation, whereas LHLO ops generate or
 | 
				
			||||||
 | 
					// consume variance (= std-dev ^ 2).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_BatchNormGradOp : LHLOGPU_Op<"batch_norm_grad">,
 | 
				
			||||||
 | 
					    BASE_HLO_BatchNormGradOp {
 | 
				
			||||||
 | 
					  let arguments = (ins
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$scale,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$mean,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$stddev,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$grad_output,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$grad_operand,  // gradient of $operand.
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$grad_scale,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$grad_offset,
 | 
				
			||||||
 | 
					    F32Attr:$epsilon,
 | 
				
			||||||
 | 
					    I64Attr:$feature_index
 | 
				
			||||||
 | 
					  );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_BatchNormInferenceOp : LHLOGPU_Op<"batch_norm_inference">,
 | 
				
			||||||
 | 
					    BASE_HLO_BatchNormInferenceOp {
 | 
				
			||||||
 | 
					  let arguments = (ins
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$scale,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$offset,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$mean,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$stddev,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
 | 
				
			||||||
 | 
					    F32Attr:$epsilon,
 | 
				
			||||||
 | 
					    I64Attr:$feature_index);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_BatchNormTrainingOp : LHLOGPU_Op<"batch_norm_training">,
 | 
				
			||||||
 | 
					    BASE_HLO_BatchNormTrainingOp {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  let arguments = (ins
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$scale,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$offset,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$batch_mean,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$batch_stddev,
 | 
				
			||||||
 | 
					    F32Attr:$epsilon,
 | 
				
			||||||
 | 
					    I64Attr:$feature_index
 | 
				
			||||||
 | 
					  );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					// LMHLO ops representing convolution library functions.
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def ActivationModeNone : StrEnumAttrCase<"None">;
 | 
				
			||||||
 | 
					def ActivationModeSigmoid : StrEnumAttrCase<"Sigmoid">;
 | 
				
			||||||
 | 
					def ActivationModeTanh : StrEnumAttrCase<"Relu">;
 | 
				
			||||||
 | 
					def ActivationModeRelu : StrEnumAttrCase<"Relu">;
 | 
				
			||||||
 | 
					def ActivationModeRelu6 : StrEnumAttrCase<"Relu6">;
 | 
				
			||||||
 | 
					def ActivationModeReluX : StrEnumAttrCase<"ReluX">;
 | 
				
			||||||
 | 
					def ActivationModeBandPass : StrEnumAttrCase<"BandPass">;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def ActivationAttr : StrEnumAttr<"Activation",
 | 
				
			||||||
 | 
					    "Activation applied with fused convolution",
 | 
				
			||||||
 | 
					    [ActivationModeNone,  ActivationModeSigmoid, ActivationModeTanh,
 | 
				
			||||||
 | 
					     ActivationModeRelu, ActivationModeRelu6, ActivationModeReluX,
 | 
				
			||||||
 | 
					     ActivationModeBandPass]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def ConvolutionBackendConfigAttr : StructAttr<"ConvolutionBackendConfig",
 | 
				
			||||||
 | 
					                                          LHLO_GPU_Dialect, [
 | 
				
			||||||
 | 
					   StructFieldAttr<"algorithm", I64Attr>,
 | 
				
			||||||
 | 
					   StructFieldAttr<"tensor_ops_enabled", BoolAttr>]> {
 | 
				
			||||||
 | 
					   let description = "GPU Convolution backend configuration";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def GpuConvolutionAttributes {
 | 
				
			||||||
 | 
					  dag attributes = !con(
 | 
				
			||||||
 | 
					    ConvolutionAttributes<LHLO_GPU_Dialect>.attributes,
 | 
				
			||||||
 | 
					    (ins F64Attr:$result_scale),
 | 
				
			||||||
 | 
					    (ins ConvolutionBackendConfigAttr:$backend_config));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def GpuFusedConvolutionAttributes {
 | 
				
			||||||
 | 
					  dag attributes = !con(
 | 
				
			||||||
 | 
					    ConvolutionAttributes<LHLO_GPU_Dialect>.attributes,
 | 
				
			||||||
 | 
					    (ins F64Attr:$result_scale,
 | 
				
			||||||
 | 
					         ActivationAttr:$activation_mode,
 | 
				
			||||||
 | 
					         F64Attr:$side_input_scale),
 | 
				
			||||||
 | 
					    (ins ConvolutionBackendConfigAttr:$backend_config));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_ConvForwardOp : LHLOGPU_Op<"conv_forward"> {
 | 
				
			||||||
 | 
					  let arguments = !con(
 | 
				
			||||||
 | 
					    (ins
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$input,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$filter,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemWrite]>:$output,
 | 
				
			||||||
 | 
					       Arg<UntypedBuffer, "", [MemWrite]>:$scratch),
 | 
				
			||||||
 | 
					     GpuConvolutionAttributes.attributes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_ConvBackwardInputOp : LHLOGPU_Op<"conv_backwardinput"> {
 | 
				
			||||||
 | 
					  let arguments = !con(
 | 
				
			||||||
 | 
					    (ins
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$d_output,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$filter,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemWrite]>:$d_input,
 | 
				
			||||||
 | 
					       Arg<UntypedBuffer, "", [MemWrite]>:$scratch),
 | 
				
			||||||
 | 
					     GpuConvolutionAttributes.attributes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_ConvBackwardFilterOp : LHLOGPU_Op<"conv_backwardfilter"> {
 | 
				
			||||||
 | 
					  let arguments = !con(
 | 
				
			||||||
 | 
					    (ins
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$input,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$d_output,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemWrite]>:$d_filter,
 | 
				
			||||||
 | 
					       Arg<UntypedBuffer, "", [MemWrite]>:$scratch),
 | 
				
			||||||
 | 
					     GpuConvolutionAttributes.attributes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// output = activation(result_scale * conv(input, filter) +
 | 
				
			||||||
 | 
					//                     side_input * side_input_scale +
 | 
				
			||||||
 | 
					//                     bias)
 | 
				
			||||||
 | 
					def LHLOGPU_ConvForwardFusedOp : LHLOGPU_Op<"conv_forward_fused"> {
 | 
				
			||||||
 | 
					  let arguments = !con(
 | 
				
			||||||
 | 
					    (ins
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$input,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$filter,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$bias,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemRead]>:$side_input,
 | 
				
			||||||
 | 
					       Arg<LHLO_Buffer, "", [MemWrite]>:$output,
 | 
				
			||||||
 | 
					       Arg<UntypedBuffer, "", [MemWrite]>:$scratch),
 | 
				
			||||||
 | 
					     GpuFusedConvolutionAttributes.attributes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					// LMHLO ops representing other library functions.
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(jurahul): Share this with the MHLO dialect.
 | 
				
			||||||
 | 
					def DotDimensionNumbersAttr : StructAttr<"DotDimensionNumbers", LHLO_GPU_Dialect, [
 | 
				
			||||||
 | 
					                StructFieldAttr<"lhs_batching_dimensions",   I64ElementsAttr>,
 | 
				
			||||||
 | 
					                StructFieldAttr<"rhs_batching_dimensions",   I64ElementsAttr>,
 | 
				
			||||||
 | 
					                StructFieldAttr<"lhs_contracting_dimensions", I64ElementsAttr>,
 | 
				
			||||||
 | 
					                StructFieldAttr<"rhs_contracting_dimensions", I64ElementsAttr>
 | 
				
			||||||
 | 
					  ]> {
 | 
				
			||||||
 | 
					  let description = "Structure of dimension information for dot product";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// output = alpha * (lhs * rhs)
 | 
				
			||||||
 | 
					// Verify: beta = 0.0
 | 
				
			||||||
 | 
					def LHLOGPU_GEMMOp : LHLOGPU_Op<"gemm"> {
 | 
				
			||||||
 | 
					  let arguments = (ins
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$output,
 | 
				
			||||||
 | 
					    DotDimensionNumbersAttr:$dot_dimension_numbers,
 | 
				
			||||||
 | 
					    F64Attr:$alpha,
 | 
				
			||||||
 | 
					    I64Attr:$batch_size,
 | 
				
			||||||
 | 
					    I64Attr:$algorithm);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// output = alpha(lhs * rhs) + beta * bias
 | 
				
			||||||
 | 
					def LHLOGPU_GEMM_BiasOp : LHLOGPU_Op<"gemm_bias"> {
 | 
				
			||||||
 | 
					  let arguments = (ins
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$bias,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$output,
 | 
				
			||||||
 | 
					    DotDimensionNumbersAttr:$dot_dimension_numbers,
 | 
				
			||||||
 | 
					    F64Attr:$alpha,
 | 
				
			||||||
 | 
					    F64Attr:$beta,
 | 
				
			||||||
 | 
					    I64Attr:$batch_size,
 | 
				
			||||||
 | 
					    I64Attr:$algorithm);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def LHLOGPU_CholeskyOp : LHLOGPU_Op<"cholesky"> {
 | 
				
			||||||
 | 
					  let arguments = (ins
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemRead]>:$input,
 | 
				
			||||||
 | 
					    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
 | 
				
			||||||
 | 
					    Arg<UntypedBuffer, "", [MemWrite]>:$scratch,
 | 
				
			||||||
 | 
					    Arg<I32Buffer, "", [MemWrite]>:$info,
 | 
				
			||||||
 | 
					    BoolAttr:$is_upper);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif // LHLO_GPU_OPS
 | 
				
			||||||
| 
						 | 
					@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 | 
				
			||||||
limitations under the License.
 | 
					limitations under the License.
 | 
				
			||||||
==============================================================================*/
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// This file defines the operations used in the LXLA dialect.
 | 
					// This file defines the operations used in the LHLO dialect.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_OPS_H_
 | 
					#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_OPS_H_
 | 
				
			||||||
#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_OPS_H_
 | 
					#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_OPS_H_
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -66,6 +66,14 @@ add_mlir_dialect_library(LmhloDialect
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
target_link_libraries(LmhloDialect PUBLIC MLIRIR)
 | 
					target_link_libraries(LmhloDialect PUBLIC MLIRIR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_mlir_dialect_library(LmhloGPUDialect
 | 
				
			||||||
 | 
					  lhlo_gpu_ops.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DEPENDS
 | 
				
			||||||
 | 
					  MLIRlhlo_gpu_opsIncGen
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					target_link_libraries(LmhloGPUDialect PUBLIC MLIRIR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
add_mlir_dialect_library(MhloRegisterDialects
 | 
					add_mlir_dialect_library(MhloRegisterDialects
 | 
				
			||||||
  init.cc
 | 
					  init.cc
 | 
				
			||||||
| 
						 | 
					@ -73,10 +81,12 @@ DEPENDS
 | 
				
			||||||
  MLIRchlo_opsIncGen
 | 
					  MLIRchlo_opsIncGen
 | 
				
			||||||
  MLIRhlo_opsIncGen
 | 
					  MLIRhlo_opsIncGen
 | 
				
			||||||
  MLIRlhlo_opsIncGen
 | 
					  MLIRlhlo_opsIncGen
 | 
				
			||||||
 | 
					  MLIRlhlo_gpu_opsIncGen
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
target_link_libraries(MhloRegisterDialects
 | 
					target_link_libraries(MhloRegisterDialects
 | 
				
			||||||
  PUBLIC
 | 
					  PUBLIC
 | 
				
			||||||
  ChloDialect
 | 
					  ChloDialect
 | 
				
			||||||
  MhloDialect
 | 
					  MhloDialect
 | 
				
			||||||
  LmhloDialect
 | 
					  LmhloDialect
 | 
				
			||||||
 | 
					  LmhloGPUDialect
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,13 +15,15 @@ limitations under the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h"
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/register.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/register.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void mlir::mhlo::registerAllMhloDialects(mlir::DialectRegistry ®istry) {
 | 
					void mlir::mhlo::registerAllMhloDialects(mlir::DialectRegistry ®istry) {
 | 
				
			||||||
  // clang-format off
 | 
					  // clang-format off
 | 
				
			||||||
  registry.insert<mlir::chlo::HloClientDialect,
 | 
					  registry.insert<mlir::chlo::HloClientDialect,
 | 
				
			||||||
 | 
					                  mlir::mhlo::MhloDialect,
 | 
				
			||||||
                  mlir::lmhlo::LmhloDialect,
 | 
					                  mlir::lmhlo::LmhloDialect,
 | 
				
			||||||
                  mlir::mhlo::MhloDialect>();
 | 
					                  mlir::lmhlo_gpu::LmhloGpuDialect>();
 | 
				
			||||||
  // clang-format on
 | 
					  // clang-format on
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,66 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This file defines the operations used in the LMHLO GPU dialect.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <assert.h>
 | 
				
			||||||
 | 
					#include <stddef.h>
 | 
				
			||||||
 | 
					#include <stdint.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "llvm/ADT/APFloat.h"
 | 
				
			||||||
 | 
					#include "llvm/ADT/APInt.h"
 | 
				
			||||||
 | 
					#include "llvm/ADT/ArrayRef.h"
 | 
				
			||||||
 | 
					#include "llvm/ADT/STLExtras.h"
 | 
				
			||||||
 | 
					#include "llvm/ADT/SmallVector.h"
 | 
				
			||||||
 | 
					#include "llvm/ADT/StringRef.h"
 | 
				
			||||||
 | 
					#include "llvm/Support/FormatVariadic.h"
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h.inc"
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.cc.inc"
 | 
				
			||||||
 | 
					#include "mlir/Dialect/StandardOps/IR/Ops.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Attributes.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Builders.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Dialect.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Location.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/MLIRContext.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/OpDefinition.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/OpImplementation.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Operation.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/OperationSupport.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/PatternMatch.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/StandardTypes.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/TypeUtilities.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Types.h"
 | 
				
			||||||
 | 
					#include "mlir/IR/Value.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace mlir {
 | 
				
			||||||
 | 
					namespace lmhlo_gpu {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LmhloGpuDialect::LmhloGpuDialect(MLIRContext *context)
 | 
				
			||||||
 | 
					    : Dialect(getDialectNamespace(), context, TypeID::get<LmhloGpuDialect>()) {
 | 
				
			||||||
 | 
					  addOperations<
 | 
				
			||||||
 | 
					#define GET_OP_LIST
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.cc.inc"
 | 
				
			||||||
 | 
					      >();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(jurahul): Add verification for operand shapes and ranks.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace lmhlo_gpu
 | 
				
			||||||
 | 
					}  // namespace mlir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GET_OP_CLASSES
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.cc.inc"
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,99 @@
 | 
				
			||||||
 | 
					// RUN: mlir-hlo-opt %s -verify-diagnostics -split-input-file | mlir-hlo-opt | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @batch_norm_grad_memrefs
 | 
				
			||||||
 | 
					func @batch_norm_grad_memrefs(%arg0: memref<8x8x8x8xf32>, %arg1: memref<8xf32>, %arg2: memref<8xf32>,
 | 
				
			||||||
 | 
					                              %arg3: memref<8xf32>, %arg4: memref<8x8x8x8xf32>,
 | 
				
			||||||
 | 
					                              %grad_operand: memref<8x8x8x8xf32>, %grad_scale: memref<8xf32>,
 | 
				
			||||||
 | 
					                              %grad_offset: memref<8xf32>) -> () {
 | 
				
			||||||
 | 
					  "lmhlo_gpu.batch_norm_grad"(%arg0, %arg1, %arg2, %arg3, %arg4, %grad_operand, %grad_scale, %grad_offset) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64}
 | 
				
			||||||
 | 
					      : (memref<8x8x8x8xf32>, memref<8xf32>, memref<8xf32>, memref<8xf32>, memref<8x8x8x8xf32>,
 | 
				
			||||||
 | 
					         memref<8x8x8x8xf32>, memref<8xf32>, memref<8xf32>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @batch_norm_inference_memrefs
 | 
				
			||||||
 | 
					func @batch_norm_inference_memrefs(%arg0: memref<8x8x8x8xf32>, %arg1: memref<8xf32>, %arg2: memref<8xf32>,
 | 
				
			||||||
 | 
					                                   %arg3: memref<8xf32>, %arg4: memref<8xf32>, %arg_out: memref<8x8x8x8xf32>) -> () {
 | 
				
			||||||
 | 
					  "lmhlo_gpu.batch_norm_inference"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg_out) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64}
 | 
				
			||||||
 | 
					      : (memref<8x8x8x8xf32>, memref<8xf32>, memref<8xf32>, memref<8xf32>, memref<8xf32>, memref<8x8x8x8xf32>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @batch_norm_training_memrefs
 | 
				
			||||||
 | 
					func @batch_norm_training_memrefs(%arg0: memref<8x8x8x8xf32>, %arg1: memref<8xf32>, %arg2: memref<8xf32>,
 | 
				
			||||||
 | 
					                                  %output: memref<8x8x8x8xf32>, %batch_mean: memref<8xf32>,
 | 
				
			||||||
 | 
					                                  %batch_var: memref<8xf32>) -> () {
 | 
				
			||||||
 | 
					  "lmhlo_gpu.batch_norm_training"(%arg0, %arg1, %arg2, %output, %batch_mean, %batch_var) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64}
 | 
				
			||||||
 | 
					      : (memref<8x8x8x8xf32>, memref<8xf32>, memref<8xf32>, memref<8x8x8x8xf32>, memref<8xf32>, memref<8xf32>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @conv_forward
 | 
				
			||||||
 | 
					func @conv_forward(%input : memref<1x1x8x8xf16>, %filter: memref<1x1x2x2xf16>, %output: memref<1x1x7x7xf16>) {
 | 
				
			||||||
 | 
					  %scratch = alloc() : memref<32xi8>
 | 
				
			||||||
 | 
					  // This defined a 2D convolution over a 8x8 single channel input using a 2x2
 | 
				
			||||||
 | 
					  // filter and with an output of 7x7xf16. The 1x1x8x8 is (N, C, H, W)
 | 
				
			||||||
 | 
					  "lmhlo_gpu.conv_forward"(%input, %filter, %output, %scratch)
 | 
				
			||||||
 | 
					    { dimension_numbers = {input_batch_dimension = 0 : i64,
 | 
				
			||||||
 | 
					                           input_feature_dimension = 1 : i64,
 | 
				
			||||||
 | 
					                           input_spatial_dimensions = dense<[2,3]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					                           kernel_input_feature_dimension = 0 : i64,
 | 
				
			||||||
 | 
					                           kernel_output_feature_dimension = 1 : i64,
 | 
				
			||||||
 | 
					                           kernel_spatial_dimensions = dense<[2,3]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					                           output_batch_dimension = 0 : i64,
 | 
				
			||||||
 | 
					                           output_feature_dimension = 1 : i64,
 | 
				
			||||||
 | 
					                           output_spatial_dimensions = dense<[2,3]> : tensor<2xi64>},
 | 
				
			||||||
 | 
					      window_strides = dense<[1, 1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					      padding = dense<[0,0]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					      lhs_dilation = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					      rhs_dilation = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					      feature_group_count = 1,
 | 
				
			||||||
 | 
					      batch_group_count = 1,
 | 
				
			||||||
 | 
					      result_scale = 1.0,
 | 
				
			||||||
 | 
					      backend_config = {algorithm=0, tensor_ops_enabled = true }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    : (memref<1x1x8x8xf16>, memref<1x1x2x2xf16>, memref<1x1x7x7xf16>, memref<32xi8>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// -----
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @gemm
 | 
				
			||||||
 | 
					func @gemm(%lhs: memref<5x4xf32>, %rhs: memref<4x5xf32>, %output:memref<5x5xf32>) {
 | 
				
			||||||
 | 
					  "lmhlo_gpu.gemm"(%lhs, %rhs, %output) { dot_dimension_numbers = {
 | 
				
			||||||
 | 
					       lhs_batching_dimensions = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					       rhs_batching_dimensions = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					       lhs_contracting_dimensions = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					       rhs_contracting_dimensions = dense<[1,1]> : tensor<2xi64>},
 | 
				
			||||||
 | 
					       alpha = 0.5,
 | 
				
			||||||
 | 
					       batch_size = 1,
 | 
				
			||||||
 | 
					       algorithm = 0}
 | 
				
			||||||
 | 
					    : (memref<5x4xf32>, memref<4x5xf32>, memref<5x5xf32>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @gemm_bias
 | 
				
			||||||
 | 
					func @gemm_bias(%lhs: memref<5x4xf32>, %rhs: memref<4x5xf32>,
 | 
				
			||||||
 | 
					                %bias: memref<5x5xf32>, %output:memref<5x5xf32>) {
 | 
				
			||||||
 | 
					  "lmhlo_gpu.gemm_bias"(%lhs, %rhs, %bias, %output) { dot_dimension_numbers = {
 | 
				
			||||||
 | 
					       lhs_batching_dimensions = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					       rhs_batching_dimensions = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					       lhs_contracting_dimensions = dense<[1,1]> : tensor<2xi64>,
 | 
				
			||||||
 | 
					       rhs_contracting_dimensions = dense<[1,1]> : tensor<2xi64>},
 | 
				
			||||||
 | 
					       alpha = 0.5,
 | 
				
			||||||
 | 
					       beta = 1.0,
 | 
				
			||||||
 | 
					       batch_size = 1,
 | 
				
			||||||
 | 
					       algorithm = 0}
 | 
				
			||||||
 | 
					    : (memref<5x4xf32>, memref<4x5xf32>, memref<5x5xf32>, memref<5x5xf32>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CHECK-LABEL: func @cholesky
 | 
				
			||||||
 | 
					func @cholesky(%arg : memref<10x10xf32>, %out: memref<10x10xf32>) {
 | 
				
			||||||
 | 
					  %scratch = alloc() : memref<32xi8>
 | 
				
			||||||
 | 
					  %info = alloc() : memref<32xi32>
 | 
				
			||||||
 | 
					  "lmhlo_gpu.cholesky"(%arg, %out, %scratch, %info) { is_upper = true }
 | 
				
			||||||
 | 
					      : (memref<10x10xf32>, memref<10x10xf32>, memref<32xi8>, memref<32xi32>) -> ()
 | 
				
			||||||
 | 
					  return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,7 @@ limitations under the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 | 
				
			||||||
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h"
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 | 
				
			||||||
#include "mlir-hlo/Dialect/mhlo/transforms/register_passes.h"
 | 
					#include "mlir-hlo/Dialect/mhlo/transforms/register_passes.h"
 | 
				
			||||||
#include "mlir/InitAllDialects.h"
 | 
					#include "mlir/InitAllDialects.h"
 | 
				
			||||||
| 
						 | 
					@ -31,6 +32,7 @@ int main(int argc, char **argv) {
 | 
				
			||||||
  registry.insert<mlir::mhlo::MhloDialect>();
 | 
					  registry.insert<mlir::mhlo::MhloDialect>();
 | 
				
			||||||
  registry.insert<mlir::chlo::HloClientDialect>();
 | 
					  registry.insert<mlir::chlo::HloClientDialect>();
 | 
				
			||||||
  registry.insert<mlir::lmhlo::LmhloDialect>();
 | 
					  registry.insert<mlir::lmhlo::LmhloDialect>();
 | 
				
			||||||
 | 
					  registry.insert<mlir::lmhlo_gpu::LmhloGpuDialect>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return failed(
 | 
					  return failed(
 | 
				
			||||||
      mlir::MlirOptMain(argc, argv, "MLIR HLO pass driver\n", registry));
 | 
					      mlir::MlirOptMain(argc, argv, "MLIR HLO pass driver\n", registry));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue