mlir-hlo/BUILD

1473 lines
42 KiB
Python
Raw Normal View History

load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library", "gentbl_filegroup", "td_library")
package(
default_visibility = ["//visibility:public"],
licenses = ["notice"],
)
exports_files([
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td",
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td",
])
td_library(
name = "hlo_ops_td_files",
srcs = glob(["include/mlir-hlo/Dialect/mhlo/IR/*.td"]) + [
# TODO(gcmn): These should be encapsulate in a td_library.
"@llvm-project//mlir:include/mlir/Interfaces/ControlFlowInterfaces.td",
"@llvm-project//mlir:include/mlir/Interfaces/CopyOpInterface.td",
"@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td",
"@llvm-project//mlir:include/mlir/Interfaces/LoopLikeInterface.td",
"@llvm-project//mlir:include/mlir/Interfaces/ViewLikeInterface.td",
"@llvm-project//mlir:include/mlir/Dialect/Shape/IR/ShapeBase.td",
"@llvm-project//mlir:include/mlir/Dialect/Shape/IR/ShapeOps.td",
],
includes = ["include"],
deps = [
"@llvm-project//mlir:ControlFlowInterfacesTdFiles",
"@llvm-project//mlir:LoopLikeInterfaceTdFiles",
"@llvm-project//mlir:MemRefOpsTdFiles",
"@llvm-project//mlir:OpBaseTdFiles",
"@llvm-project//mlir:SideEffectTdFiles",
],
)
gentbl_cc_library(
name = "MhloPassIncGen",
strip_include_prefix = "include",
tbl_outs = [
(
[
"-gen-pass-decls",
"-name=MHLO",
],
"include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.h.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [
"@llvm-project//mlir:PassBaseTdFiles",
],
)
gentbl_cc_library(
name = "LmhloPassIncGen",
strip_include_prefix = "include",
tbl_outs = [
(
[
"-gen-pass-decls",
"-name=LMHLO",
],
"include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.h.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [
"@llvm-project//mlir:PassBaseTdFiles",
],
)
gentbl_cc_library(
name = "chlo_ops_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h.inc",
),
(
["-gen-op-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "hlo_ops_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc",
),
(
["-gen-op-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
2021-07-23 11:38:34 +08:00
cc_binary(
name = "mlir-tblgen-builder",
srcs = glob([
"tools/mlir-tblgen-builder/*.h",
"tools/mlir-tblgen-builder/*.cpp",
"tools/mlir-tblgen-builder/TableGen/*.h",
"tools/mlir-tblgen-builder/TableGen/*.cpp",
]),
deps = [
"@llvm-project//mlir:MlirTableGenMain",
"@llvm-project//mlir:Support",
# "@llvm-project//mlir:TableGen",
"@llvm-project//llvm:Support",
"@llvm-project//llvm:TableGen",
"@llvm-project//llvm:config",
],
)
gentbl_cc_library(
name = "hlo_ops_builder_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-builder-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_builder.h.inc",
),
(
["-gen-builder-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_builder.cc.inc",
),
],
tblgen = ":mlir-tblgen-builder",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "hlo_ops_base_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.h.inc",
),
(
["-gen-op-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "hlo_ops_base_structs_inc_gen",
tbl_outs = [
(
["-gen-struct-attr-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.h.inc",
),
(
["-gen-struct-attr-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "hlo_ops_base_enums_inc_gen",
tbl_outs = [
(
["-gen-enum-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_enums.h.inc",
),
(
["-gen-enum-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_enums.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "hlo_ops_pattern_gen",
strip_include_prefix = "lib/Dialect/mhlo/IR/",
tbl_outs = [
(
["-gen-rewriters"],
"lib/Dialect/mhlo/IR/hlo_patterns.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "lib/Dialect/mhlo/IR/hlo_patterns.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [
":hlo_ops_td_files",
"@llvm-project//mlir:StdOpsTdFiles",
"@llvm-project//mlir:TensorOpsTdFiles",
],
)
gentbl_cc_library(
name = "lhlo_ops_structs_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-struct-attr-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.h.inc",
),
(
["-gen-struct-attr-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "lhlo_ops_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc",
),
(
["-gen-op-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "lhlo_gpu_ops_structs_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-struct-attr-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.h.inc",
),
(
["-gen-struct-attr-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "lhlo_gpu_ops_enums_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-enum-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_enums.h.inc",
),
(
["-gen-enum-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_enums.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_enums.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_filegroup(
name = "hlo_ops_doc_gen",
tbl_outs = [
(
["-gen-dialect-doc"],
"g3doc/hlo_ops.md",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td",
deps = [":hlo_ops_td_files"],
)
gentbl_filegroup(
name = "lhlo_ops_doc_gen",
tbl_outs = [
(
["-gen-dialect-doc"],
"g3doc/lhlo_ops.md",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td",
deps = [":hlo_ops_td_files"],
)
cc_library(
name = "hlo_ops_common",
srcs = ["lib/Dialect/mhlo/IR/hlo_ops_common.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_common.h"],
includes = ["include"],
deps = [
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Support",
],
)
cc_library(
name = "lhlo_gpu_ops_structs",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.cc.inc",
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.h.inc",
"lib/Dialect/mhlo/IR/lhlo_gpu_ops_structs.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_structs.h",
],
includes = ["include"],
deps = [
":lhlo_gpu_ops_structs_inc_gen",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Support",
],
)
cc_library(
name = "lhlo_gpu_ops_enums",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_enums.cc.inc",
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_enums.h.inc",
"lib/Dialect/mhlo/IR/lhlo_gpu_ops_enums.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_enums.h",
],
includes = ["include"],
deps = [
":lhlo_gpu_ops_enums_inc_gen",
"@llvm-project//llvm:Support",
],
)
gentbl_cc_library(
name = "lhlo_gpu_ops_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h.inc",
),
(
["-gen-op-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
#TODO(aminim): revisit the naming and grouping of these rules post-move.
gentbl_cc_library(
name = "canonicalize_inc_gen",
strip_include_prefix = "lib/Dialect/mhlo/IR/",
tbl_outs = [
(
["-gen-rewriters"],
"lib/Dialect/mhlo/IR/mhlo_canonicalize.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "lib/Dialect/mhlo/IR/mhlo_canonicalize.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
gentbl_cc_library(
name = "infer_fusibility_op_interface_gen",
tbl_outs = [
(
["-gen-op-interface-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h.inc",
),
(
["-gen-op-interface-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.cpp.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
cc_library(
name = "infer_fusibility_op_interface",
srcs = [
"lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h",
"include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h.inc",
],
includes = ["include"],
deps = [
":infer_fusibility_op_interface_gen",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Support",
],
alwayslink = 1,
)
cc_library(
name = "hlo_ops_base_structs",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.h.inc",
"lib/Dialect/mhlo/IR/hlo_ops_base_structs.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.h",
],
includes = ["include"],
deps = [
":hlo_ops_base_structs_inc_gen",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Support",
],
)
cc_library(
name = "hlo_ops_base_enums",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_enums.h.inc",
"lib/Dialect/mhlo/IR/hlo_ops_base_enums.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_enums.h",
],
includes = ["include"],
deps = [
":hlo_ops_base_enums_inc_gen",
"@llvm-project//llvm:Support",
],
)
cc_library(
name = "convert_op_folder",
srcs = ["lib/utils/convert_op_folder.cc"],
hdrs = ["include/mlir-hlo/utils/convert_op_folder.h"],
includes = ["include"],
deps = [
"@llvm-project//mlir:IR",
],
)
cc_library(
name = "hlo",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc",
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc",
"lib/Dialect/mhlo/IR/chlo_ops.cc",
"lib/Dialect/mhlo/IR/hlo_ops.cc",
"lib/utils/broadcast_utils.cc",
"lib/utils/hlo_utils.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h",
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h",
"include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.h",
"include/mlir-hlo/utils/broadcast_utils.h",
"include/mlir-hlo/utils/hlo_utils.h",
],
includes = ["include"],
deps = [
":canonicalize_inc_gen",
":chlo_ops_inc_gen",
":convert_op_folder",
":hlo_ops_base_enums",
":hlo_ops_base_inc_gen",
":hlo_ops_base_structs",
":hlo_ops_common",
":hlo_ops_inc_gen",
2021-07-23 11:38:34 +08:00
":hlo_ops_builder_gen",
":hlo_ops_pattern_gen",
":infer_fusibility_op_interface",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:ControlFlowInterfaces",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:InferTypeOpInterface",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:SideEffects",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:TransformUtils",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "lhlo",
srcs = [
"lib/Dialect/mhlo/IR/lhlo_ops.cc",
"lib/Dialect/mhlo/IR/lhlo_ops_structs.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h",
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.h",
"include/mlir-hlo/utils/lhlo_utils.h",
],
includes = ["include"],
deps = [
":hlo_ops_base_enums",
":hlo_ops_base_inc_gen",
":hlo_ops_base_structs",
":hlo_ops_common",
":lhlo_ops_inc_gen",
":lhlo_ops_structs_inc_gen",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:ControlFlowInterfaces",
"@llvm-project//mlir:CopyOpInterface",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LoopLikeInterface",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SideEffects",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
"@llvm-project//mlir:Transforms",
"@llvm-project//mlir:ViewLikeInterface",
],
alwayslink = 1,
)
cc_library(
name = "lhlo_gpu",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.cc.inc",
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h.inc",
"lib/Dialect/mhlo/IR/lhlo_gpu_ops.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h",
],
includes = ["include"],
deps = [
":hlo",
":hlo_ops_base_enums",
":hlo_ops_base_structs",
":hlo_ops_common",
":infer_fusibility_op_interface",
":lhlo",
":lhlo_gpu_ops_enums",
":lhlo_gpu_ops_inc_gen",
":lhlo_gpu_ops_structs",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:ControlFlowInterfaces",
"@llvm-project//mlir:CopyOpInterface",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:InferTypeOpInterface",
"@llvm-project//mlir:LoopLikeInterface",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SideEffects",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
"@llvm-project//mlir:Transforms",
"@llvm-project//mlir:ViewLikeInterface",
],
alwayslink = 1,
)
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
gentbl_cc_library(
name = "DiscRalPassIncGen",
strip_include_prefix = "include",
tbl_outs = [
(
[
"-gen-pass-decls",
"-name=RAL",
],
"include/mlir-hlo/Dialect/mhlo/transforms/disc_ral_passes.h.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/transforms/disc_ral_passes.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [
"@llvm-project//mlir:PassBaseTdFiles",
],
)
gentbl_cc_library(
name = "disc_ral_ops_inc_gen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir-hlo/Dialect/mhlo/IR/disc_ral_ops.h.inc",
),
(
["-gen-op-defs"],
"include/mlir-hlo/Dialect/mhlo/IR/disc_ral_ops.cc.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "include/mlir-hlo/Dialect/mhlo/IR/disc_ral_ops.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
cc_library(
name = "disc_ral",
srcs = [
"include/mlir-hlo/Dialect/mhlo/IR/disc_ral_ops.cc.inc",
"include/mlir-hlo/Dialect/mhlo/IR/disc_ral_ops.h.inc",
"lib/Dialect/mhlo/IR/disc_ral_ops.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/IR/disc_ral_ops.h",
],
includes = ["include"],
deps = [
":disc_ral_ops_inc_gen",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:ControlFlowInterfaces",
"@llvm-project//mlir:CopyOpInterface",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:InferTypeOpInterface",
"@llvm-project//mlir:LoopLikeInterface",
"@llvm-project//mlir:MemRefDialect",
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SideEffects",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
"@llvm-project//mlir:Transforms",
"@llvm-project//mlir:ViewLikeInterface",
],
alwayslink = 1,
)
cc_library(
name = "ral_inject_execution_context",
srcs = ["lib/Dialect/mhlo/transforms/ral_inject_execution_context.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":disc_ral",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "hlo_dialect_registration",
srcs = ["lib/Dialect/mhlo/IR/init.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/IR/register.h"],
deps = [
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
":disc_ral",
":hlo",
":lhlo",
":lhlo_gpu",
"@llvm-project//mlir:IR",
],
)
cc_library(
name = "sink_constants_to_control_flow",
srcs = [
"lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc",
],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":hlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "mhlo_control_flow_to_scf",
srcs = ["lib/Dialect/mhlo/transforms/mhlo_control_flow_to_scf.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":hlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
],
)
cc_library(
name = "map_lmhlo_to_scalar_op",
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h"],
deps = [
":hlo",
":lhlo",
":map_hlo_to_lhlo_op",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:ComplexDialect",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MathDialect",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
],
)
cc_library(
name = "map_chlo_to_hlo_op",
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/map_chlo_to_hlo_op.h"],
deps = [
":hlo",
"@llvm-project//mlir:IR",
],
)
cc_library(
name = "map_hlo_to_lhlo_op",
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h"],
deps = [
":hlo",
":lhlo",
],
)
cc_library(
name = "lhlo_legalize_to_affine",
srcs = ["lib/Dialect/mhlo/transforms/lhlo_legalize_to_affine.cc"],
deps = [
":hlo",
":lhlo",
":map_lmhlo_to_scalar_op",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Affine",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TransformUtils",
],
alwayslink = 1,
)
cc_library(
name = "lhlo_legalize_to_parallel_loops",
srcs = ["lib/Dialect/mhlo/transforms/lhlo_legalize_to_parallel_loops.cc"],
deps = [
":lhlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgOps",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "legalize_to_linalg",
srcs = ["lib/Dialect/mhlo/transforms/legalize_to_linalg.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
":lhlo",
":map_lmhlo_to_scalar_op",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Affine",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgOps",
"@llvm-project//mlir:MathDialect",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "broadcast_propagation",
srcs = ["lib/Dialect/mhlo/transforms/broadcast_propagation.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
":map_chlo_to_hlo_op",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:InferTypeOpInterface",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "rank_specialization",
srcs = ["lib/Dialect/mhlo/transforms/rank_specialization.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:InferTypeOpInterface",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "lhlo_legalize_to_gpu",
srcs = ["lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc"],
deps = [
":hlo",
":lhlo",
":map_lmhlo_to_scalar_op",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Affine",
"@llvm-project//mlir:GPUDialect",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgOps",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "lhlo_fuse_linalg",
srcs = ["lib/Dialect/mhlo/transforms/lhlo_fuse_linalg.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":lhlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Affine",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgTransforms",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:TransformUtils",
"@llvm-project//mlir:ViewLikeInterface",
],
alwayslink = 1,
)
cc_library(
name = "hlo_legalize_to_lhlo",
srcs = ["lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
":lhlo",
":map_hlo_to_lhlo_op",
PR #49970: [MLIR][DISC] bufferize DynamicReshape and DynamicBroadcastInDim Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/49970 1, add hlo-to-lhlo support for DynamicReshape and DynamicBroadcastInDim 2, add a flag `convert-to-lmhlo-only` to seperate following two case: - hlo-to-lhlo only. Simply lowers all mhlo ops to their lmhlo counterparts, do not apply any optimization (e.g. elide any buffer copy). Buffer optimization is not easy in dynamic shape world especially when involving control flow, thus we leave this to another dedicated pass. - hlo-to-lhlo-or-memref-directly. Lowers some metadata-only mhlo ops (e.g. reshape) to memref dialect directly and Lowers others to their lmhlo counterparts. Copybara import of the project: -- 562bd65a368f6194405c4ae6900e3b4388a5ec03 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] bufferize DynamicReshape and DynamicBroadcastInDim 1, add hlo-to-lhlo support for DynamicReshape and DynamicBroadcastInDim 2, add a flag `convert-to-lmhlo-only` to seperate following two case: - hlo-to-lhlo only. Simply lowers all mhlo ops to their lmhlo counterparts, do not apply any optimization (e.g. elide any buffer copy). Buffer optimization is not easy in dynamic shape world especially when involving control flow, thus we leave this to another dedicated pass. - hlo-to-lhlo-or-memref-directly. Lowers some metadata-only mhlo ops (e.g. reshape) to memref dialect directly and Lowers others to their lmhlo counterparts. PiperOrigin-RevId: 377603395
2021-06-05 06:35:08 +08:00
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:ShapeTransforms",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:StandardOpsTransforms",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "cycle_detector",
srcs = ["lib/utils/cycle_detector.cc"],
hdrs = ["include/mlir-hlo/utils/cycle_detector.h"],
includes = ["include"],
deps = [
"@llvm-project//llvm:Support",
],
alwayslink = 1,
)
cc_library(
name = "mhlo_fusion",
srcs = ["lib/Dialect/mhlo/transforms/mhlo_fusion.cc"],
deps = [
":cycle_detector",
":hlo",
":pass_details",
"@llvm-project//llvm:Core",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
],
alwayslink = 1,
)
gentbl_cc_library(
name = "legalize_to_standard_inc_gen",
strip_include_prefix = "lib/Dialect/mhlo/transforms/",
tbl_outs = [
(
["-gen-rewriters"],
"lib/Dialect/mhlo/transforms/generated_legalize_to_standard.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "lib/Dialect/mhlo/transforms/legalize_to_standard_patterns.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [
":hlo_ops_td_files",
"@llvm-project//mlir:StdOpsTdFiles",
],
)
cc_library(
name = "legalize_control_flow",
srcs = ["lib/Dialect/mhlo/transforms/legalize_control_flow.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":hlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
],
alwayslink = 1,
)
cc_library(
name = "legalize_to_standard",
srcs = ["lib/Dialect/mhlo/transforms/legalize_to_standard.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":hlo",
":legalize_to_standard_inc_gen",
":legalize_trigonometric_to_approximation",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
],
alwayslink = 1,
)
cc_library(
name = "legalize_gather_to_torch_index_select",
srcs = ["lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "legalize_trigonometric_to_approximation",
srcs = ["lib/Dialect/mhlo/transforms/legalize_trigonometric_to_approximation.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
includes = ["include"],
deps = [
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MathDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
gentbl_cc_library(
name = "lower_complex_inc_gen",
strip_include_prefix = "lib/Dialect/mhlo/transforms/",
tbl_outs = [
(
["-gen-rewriters"],
"lib/Dialect/mhlo/transforms/generated_lower_complex.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "lib/Dialect/mhlo/transforms/lower_complex_patterns.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [
":hlo_ops_td_files",
"@llvm-project//mlir:StdOpsTdFiles",
],
)
cc_library(
#TODO(aminim): find a better name here?
name = "mhlo_to_mhlo_lowering_patterns",
srcs = [
"lib/Dialect/mhlo/transforms/lower_complex.cc",
"lib/Dialect/mhlo/transforms/lower_general_dot.cc",
"lib/Dialect/mhlo/transforms/optimize_mhlo.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
":lower_complex_inc_gen",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "materialize_broadcasts",
srcs = [
"lib/Dialect/mhlo/transforms/materialize_broadcasts.cc",
],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
],
deps = [
":hlo",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Transforms",
],
)
cc_library(
name = "unfuse_batch_norm",
srcs = ["lib/Dialect/mhlo/transforms/unfuse_batch_norm.cc"],
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/passes.h",
],
deps = [
":hlo",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
)
cc_library(
name = "legalize_tensor_load_op",
srcs = ["lib/Dialect/mhlo/transforms/legalize_tensor_load_op.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"],
deps = [
":lhlo",
":pass_details",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
)
cc_library(
name = "fusion_utils",
srcs = ["lib/Dialect/mhlo/transforms/fusion_utils.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/fusion_utils.h"],
deps = [
":lhlo",
"@llvm-project//llvm:Core",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
],
alwayslink = 1,
)
cc_library(
name = "lhlo_fusion",
srcs = ["lib/Dialect/mhlo/transforms/lhlo_fusion.cc"],
deps = [
":cycle_detector",
":fusion_utils",
":lhlo",
":pass_details",
"@llvm-project//llvm:Core",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
],
alwayslink = 1,
)
cc_library(
name = "chlo_legalize_to_hlo",
srcs = ["lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc"],
hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h"],
deps = [
":chlo_legalize_to_hlo_inc_gen",
":hlo",
":map_chlo_to_hlo_op",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
)
gentbl_cc_library(
name = "chlo_legalize_to_hlo_inc_gen",
strip_include_prefix = "lib/Dialect/mhlo/transforms/",
tbl_outs = [
(
["-gen-rewriters"],
"lib/Dialect/mhlo/transforms/generated_chlo_legalize_to_hlo.inc",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_patterns.td",
td_includes = [
"external/mlir-hlo/include",
"include",
],
deps = [":hlo_ops_td_files"],
)
cc_library(
name = "pass_details",
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/PassDetail.h",
],
visibility = [
"//visibility:private", # This target is a private detail of pass implementations
],
deps = [
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
":DiscRalPassIncGen",
":LmhloPassIncGen",
":MhloPassIncGen",
"@llvm-project//mlir:Pass",
],
)
cc_library(
name = "test_passes",
srcs = [
"include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h",
"lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc",
"lib/Dialect/mhlo/transforms/materialize_broadcasts_pass.cc",
"lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc",
"lib/Dialect/mhlo/transforms/test_infer_shaped_type_pass.cc",
"lib/Dialect/mhlo/transforms/unfuse_batch_norm_pass.cc",
],
deps = [
":chlo_legalize_to_hlo", # build-cleaner: keep
":hlo",
":lhlo",
":materialize_broadcasts", # build-cleaner: keep
":pass_details",
":unfuse_batch_norm", # build-cleaner: keep
"@llvm-project//mlir:IR",
"@llvm-project//mlir:InferTypeOpInterface",
"@llvm-project//mlir:LLVMDialect",
"@llvm-project//mlir:LLVMTransforms",
"@llvm-project//mlir:MemRefDialect",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:Transforms",
],
alwayslink = 1,
)
cc_library(
name = "all_passes",
hdrs = [
"include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h",
],
deps = [
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
":DiscRalPassIncGen",
":LmhloPassIncGen",
":MhloPassIncGen",
":broadcast_propagation",
":chlo_legalize_to_hlo",
":hlo_legalize_to_lhlo",
":legalize_control_flow",
":legalize_gather_to_torch_index_select",
":legalize_tensor_load_op",
":legalize_to_linalg",
":legalize_to_standard",
":legalize_trigonometric_to_approximation",
":lhlo",
":lhlo_fuse_linalg",
":lhlo_fusion",
":lhlo_legalize_to_affine",
":lhlo_legalize_to_gpu",
":lhlo_legalize_to_parallel_loops",
":mhlo_control_flow_to_scf",
":mhlo_fusion",
":mhlo_to_mhlo_lowering_patterns",
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
":ral_inject_execution_context",
":rank_specialization",
":sink_constants_to_control_flow",
":test_passes",
"@llvm-project//mlir:Pass",
],
)
cc_binary(
name = "mlir-hlo-opt",
srcs = [
"tools/mlir-hlo-opt/mlir-hlo-opt.cpp",
],
deps = [
":all_passes",
PR #50191: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/50191 DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. Copybara import of the project: -- c4f20a89aed71181e75bcc5265723b88bde23240 by Wenyi Zhao <reyizero@gmail.com>: [MLIR][DISC] Add RAL (Runtime abstraction layer) Dialect DISC is a e2e flow, including both compiler side and runtime side. For runtime side, we have different targeting environments (e.g. tensorflow, pytorch, or sometimes even a standalone binary). In order to simplify the design of the compiler side, we design a Runtime Abstraction Layer (RAL) to sperate the compiler side and runtime side. Thus the compiler side only need to target RAL itself and it is the responsibility of RAL to handle the differences between different targeting environments. One of the most important functions of RAL is to manage stateful resources. To this end, it provides a context object, and hides all stateful operations behind this context, thus the compiler side itself doesn't need to care about the resource initialization. For example, a kernel must be loaded before it can be launched on GPU. However, the loading operation should only be taken once during the whole lifetime of the context in order to achieve the best performance. Based on the initialization-free interfaces provided by RAL, compiler side can focus on its core optimization logic and lets the RAL to manage the resource status. The context mentioned above is passed as a parameter to the entry function and all RAL APIs should always use the context as their first argument. This CR also provides a pass to help to ensure this property. The pass rewrites the entry function to make sure their first argument is the context. For entry function, the pass also rewrites its inputs and outputs. To be concrete, all the original inputs and outputs of the entry function are received from and sent to RAL through a sequence of RAL API calls correspondingly. The motivation behind this is to hide the implementation details of I/Os. This design may also potentially enable partial execution of the compiled module when some of the inputs are ready. -- 1991d4f80ab6087943956e1c0fec4940a22ab08d by Wenyi Zhao <reyizero@gmail.com>: fix PiperOrigin-RevId: 379317586
2021-06-15 02:26:41 +08:00
":disc_ral",
":hlo",
":lhlo",
":lhlo_gpu",
2021-07-12 21:03:29 +08:00
"@llvm-project//llvm:AllTargetsAsmParsers",
"@llvm-project//llvm:AllTargetsCodeGens",
"@llvm-project//llvm:Core",
"@llvm-project//llvm:ExecutionEngine",
"@llvm-project//llvm:Option",
"@llvm-project//llvm:OrcJIT",
"@llvm-project//llvm:Support",
2021-07-12 21:03:29 +08:00
"@llvm-project//llvm:Target",
"@llvm-project//mlir:AllPassesAndDialects",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MlirOptLib",
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:Support",
2021-07-12 21:03:29 +08:00
"@llvm-project//mlir:MlirJitRunner",
],
)
# Python library.
td_library(
name = "MhloOpsPyTdFiles",
srcs = [
"@llvm-project//mlir:include/mlir/Bindings/Python/Attributes.td",
],
includes = ["include"],
deps = [
":hlo_ops_td_files",
"@llvm-project//mlir:OpBaseTdFiles",
],
)
gentbl_filegroup(
name = "MhloOpsPyGen",
tbl_outs = [
(
[
"-gen-python-op-bindings",
"-bind-dialect=mhlo",
],
"python/_mhlo_ops_gen.py",
),
],
tblgen = "@llvm-project//mlir:mlir-tblgen",
td_file = "python/MhloOps.td",
deps = [
":MhloOpsPyTdFiles",
],
)
filegroup(
name = "MhloOpsPyFiles",
srcs = [
"python/mhlo.py",
":MhloOpsPyGen",
],
)