From fcf3df1541dd40ffb1228164c660fe12a284f68c Mon Sep 17 00:00:00 2001
From: Mehdi Amini <aminim@google.com>
Date: Wed, 1 Jul 2020 19:18:52 +0000
Subject: [PATCH] Move the HLO/LHLO dialects to a new directory:
 tensorflow/compiler/mlir/hlo

We're preparing to restructure the MLIR HLO ecosystem with 5 dialects:

- chlo: client dialect with explicit broadcast and multiple composite operations
- mhlo: hlo with dynamic shape, decouple from XLA for evolution purpose
- lmhlo: same as above, but after buffer assignment.
- xla_hlo: mapping 1:1 to the XLA HloInstruction class.
- xla_lhlo: same as above, but after buffer assignment.

The first three dialects are intended to live in the new tensorflow/compiler/mlir/hlo
path, the latter two will be created in tensorflow/compiler/mlir/xla.

This patch only moves the directory, will followup with other transformations and tests.

The structure of the new directory follows: https://llvm.discourse.group/t/rfc-canonical-file-paths-to-dialects/621 as we intend to make it a standalone buildable component (see also https://github.com/google/mlir-npcomp as another example).

PiperOrigin-RevId: 319273229
---
 include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h   |   45 +
 include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td  |  370 +++
 include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h    |   99 +
 include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td   | 1390 +++++++++++
 .../mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td  | 1331 +++++++++++
 include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td |   43 +
 .../mhlo/IR/infer_fusibility_op_interface.h   |   28 +
 .../mhlo/IR/infer_fusibility_op_interface.td  |  161 ++
 include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h   |   52 +
 include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td  |  796 +++++++
 include/mlir-hlo/utils/broadcast_utils.h      |   49 +
 include/mlir-hlo/utils/convert_op_folder.h    |   33 +
 include/mlir-hlo/utils/hlo_utils.h            |   74 +
 lib/Dialect/mhlo/IR/chlo_ops.cc               |  278 +++
 lib/Dialect/mhlo/IR/dialect_registration.cc   |   24 +
 lib/Dialect/mhlo/IR/hlo_ops.cc                | 2110 +++++++++++++++++
 .../mhlo/IR/infer_fusibility_op_interface.cc  |   22 +
 lib/Dialect/mhlo/IR/lhlo_ops.cc               |  102 +
 lib/Dialect/mhlo/transforms/canonicalize.td   |   30 +
 lib/utils/broadcast_utils.cc                  |   74 +
 lib/utils/convert_op_folder.cc                |   86 +
 lib/utils/hlo_utils.cc                        |   70 +
 22 files changed, 7267 insertions(+)
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h
 create mode 100644 include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td
 create mode 100644 include/mlir-hlo/utils/broadcast_utils.h
 create mode 100644 include/mlir-hlo/utils/convert_op_folder.h
 create mode 100644 include/mlir-hlo/utils/hlo_utils.h
 create mode 100644 lib/Dialect/mhlo/IR/chlo_ops.cc
 create mode 100644 lib/Dialect/mhlo/IR/dialect_registration.cc
 create mode 100644 lib/Dialect/mhlo/IR/hlo_ops.cc
 create mode 100644 lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc
 create mode 100644 lib/Dialect/mhlo/IR/lhlo_ops.cc
 create mode 100644 lib/Dialect/mhlo/transforms/canonicalize.td
 create mode 100644 lib/utils/broadcast_utils.cc
 create mode 100644 lib/utils/convert_op_folder.cc
 create mode 100644 lib/utils/hlo_utils.cc

diff --git a/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h b/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h
new file mode 100644
index 0000000..222b808
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h
@@ -0,0 +1,45 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_CHLO_OPS_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_IR_CHLO_OPS_H_
+
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Dialect.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/DialectImplementation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpDefinition.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Operation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Types.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/InferTypeOpInterface.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/SideEffectInterfaces.h"
+
+namespace mlir {
+namespace xla_chlo {
+
+class XlaHloClientDialect : public Dialect {
+ public:
+  explicit XlaHloClientDialect(MLIRContext *context);
+  static StringRef getDialectNamespace() { return "xla_chlo"; }
+};
+
+#define GET_OP_CLASSES
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h.inc"
+
+}  // namespace xla_chlo
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_IR_CHLO_OPS_H_
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td b/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td
new file mode 100644
index 0000000..4cf48c6
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td
@@ -0,0 +1,370 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Defines "client" aligned HLO ops.
+// These ops are not necessarily orthogonal or optimized for transformation but
+// for ease of expression in certain cases deemed important for client
+// libraries (i.e. implicit broadcasting, helper ops, etc).
+// This dialect is considered to exist in addition to augment the xla_hlo
+// dialect for ergonomic needs, not duplicate/replace it.
+//
+// The typical use of this dialect is for client libraries to be able to emit
+// less constrained ops and rely on the conversion framework to lower any
+// xla_chlo ops to canonical xla_hlo ops.
+//
+// See: https://www.tensorflow.org/xla/operation_semantics
+
+#ifndef CHLO_OPS
+#define CHLO_OPS
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/InferTypeOpInterface.td"
+include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td"
+
+def HLOClient_Dialect : Dialect {
+  let name = "xla_chlo";
+  let cppNamespace = "xla_chlo";
+  let summary = [{
+    XLA Client HLO Ops
+  }];
+
+  let description = [{
+    This dialect contains ops that align closely with the API surface area
+    of the XlaBuilder C++ API, where such ops have semantics that go beyond
+    what exists in the lower level dialects (such as `xla_hlo`). Essentially,
+    whenever the client library uses syntactic sugar or composition
+    of multiple ops for an API call, this dialect tries to model the API call
+    and provide conversion patterns to fully materialize into lower level
+    dialects.
+  }];
+}
+
+class HLOClient_Op<string mnemonic, list<OpTrait> traits> :
+    Op<HLOClient_Dialect, mnemonic, traits> {
+  // TODO(b/129012527) Much of this custom verification should be expressed as
+  // type constraints.
+  let verifier = [{ return Verify(*this); }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA binary elementwise op definitions.
+// From the client perspective, each of these support both explicit rank
+// broadcasting (via the broadcast_dimensions attribute) and implicit degenerate
+// shape broadcasting.
+//
+// These correspond to operations in the xla_hlo dialect without the
+// "broadcast_" prefix, except that those ops require same-shaped operands and
+// results.
+//
+// See:
+//   https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations
+//   https://www.tensorflow.org/xla/broadcasting
+//===----------------------------------------------------------------------===//
+
+class HLOClient_BroadcastBinaryElementwiseOp<
+  string mnemonic, list<OpTrait> traits> :
+        HLOClient_Op<mnemonic,
+            !listconcat(traits, [
+              DeclareOpInterfaceMethods<InferShapedTypeOpInterface>])> {
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    // Explicit rank-broadcast dimension mappings. Defaults to "numpy" prefix
+    // padded rank-broadcast semantics if omitted.
+    OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions
+  );
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, Value left, Value  right, "
+    "DenseIntElementsAttr broadcast_dimensions"
+  >];
+
+  let results = (outs HLO_Tensor);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs attr-dict `:`
+    `(` type($lhs) `,` type($rhs) `)` `->` type(results)
+  }];
+
+  let extraClassDeclaration = [{
+    // TODO(laurenzo): It isn't clear to me why reifyReturnShapes does not
+    // have its declaration generated by DeclareOpInterfaceMethods.
+    LogicalResult reifyReturnTypeShapes(
+         OpBuilder& builder, SmallVectorImpl<Value>& reifiedReturnShapes);
+  }];
+}
+
+def HLOClient_BroadcastAddOp : HLOClient_BroadcastBinaryElementwiseOp<"broadcast_add",
+    [Commutative, NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Addition operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs + rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastAtan2Op : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_atan2",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Atan2 operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `atan2(lhs/rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastDivOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_divide",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Division operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs / rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastMaxOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_maximum",
+    [Commutative, NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Maximum operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `max(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastMinOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_minimum",
+    [Commutative, NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Minimum operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `min(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastMulOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_multiply",
+    [Commutative, NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Multiplication operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs * rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastPowOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_power",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Power operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs ^ rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastRemOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_remainder",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Remainder operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs % rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastShiftLeftOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_shift_left",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Shift left operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs << rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastShiftRightArithmeticOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_shift_right_arithmetic",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Shift right arithmetic operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs >> rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastShiftRightLogicalOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_shift_right_logical",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Shift right logical operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs >> rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastSubOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_subtract",
+    [NoSideEffect, SameOperandsAndResultElementType]> {
+  string summary = "Subtraction operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `lhs - rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA binary elementwise op definitions.
+// The same description as the arithmetic binary elementwise ops applies.
+//===----------------------------------------------------------------------===//
+
+class HLOClient_BroadcastBinaryLogicalElementwiseOp<string mnemonic> :
+    HLOClient_BroadcastBinaryElementwiseOp<
+      mnemonic, [Commutative, NoSideEffect]> {
+  let arguments = (ins
+    HLO_PredOrIntTensor:$lhs,
+    HLO_PredOrIntTensor:$rhs,
+    // Explicit rank-broadcast dimension mappings. Defaults to "numpy" prefix
+    // padded rank-broadcast semantics if omitted.
+    OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions
+  );
+}
+
+def HLOClient_BroadcastAndOp: HLOClient_BroadcastBinaryLogicalElementwiseOp<
+    "broadcast_and"> {
+  string summary = "Logical and operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `logical_and(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastOrOp: HLOClient_BroadcastBinaryLogicalElementwiseOp<
+    "broadcast_or"> {
+  string summary = "Logical or operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `logical_or(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+def HLOClient_BroadcastXorOp : HLOClient_BroadcastBinaryLogicalElementwiseOp<
+    "broadcast_xor"> {
+  string summary = "Logical xor operator (with optional broadcasting)";
+
+  string description = [{
+    Returns `logical_xor(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// Broadcasting complex op
+//===----------------------------------------------------------------------===//
+
+def HLOClient_BroadcastComplexOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_complex", [NoSideEffect]> {
+  string summary = "Complex operator (with optional broadcasting)";
+
+  string description = [{
+    Performs element-wise conversion of a pair of real and imaginary values to
+    a complex value.
+  }];
+
+  let arguments = (ins
+    HLO_FpTensor:$lhs,
+    HLO_FpTensor:$rhs,
+    // Explicit rank-broadcast dimension mappings. Defaults to "numpy" prefix
+    // padded rank-broadcast semantics if omitted.
+    OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions
+  );
+  let results = (outs HLO_ComplexTensor);
+}
+
+//===----------------------------------------------------------------------===//
+// Broadcasting compare op
+//===----------------------------------------------------------------------===//
+
+def HLOClient_BroadcastCompareOp : HLOClient_BroadcastBinaryElementwiseOp<
+    "broadcast_compare", [NoSideEffect]> {
+  string summary = "Compare operator (with optional broadcasting)";
+
+  string description = [{
+    Compares `lhs` and `rhs` elementwise according to `comparison_direction`.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_comparison_operations.
+  }];
+
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions,
+    HLO_ComparisonDirectionAttr:$comparison_direction
+  );
+  let results = (outs HLO_PredTensor);
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, "
+    "DenseIntElementsAttr broadcast_dimensions, StringAttr comparison_direction"
+  >];
+}
+
+#endif  // CHLO_OPS
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h
new file mode 100644
index 0000000..b6360b7
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h
@@ -0,0 +1,99 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines the operations used in the XLA dialect.
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_OPS_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_OPS_H_
+
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Dialect.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/DialectImplementation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Location.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpDefinition.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Operation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Types.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/InferTypeOpInterface.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/SideEffectInterfaces.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h"
+
+namespace mlir {
+class OpBuilder;
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.h.inc"
+
+namespace xla_hlo {
+
+class XlaHloDialect : public Dialect {
+ public:
+  explicit XlaHloDialect(MLIRContext *context);
+  static StringRef getDialectNamespace() { return "xla_hlo"; }
+
+  // Registered hook to materialize a constant operation from a given attribute
+  // value with the desired resultant type.
+  Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
+                                 Location loc) override;
+
+  // Parses a type registered to this dialect.
+  Type parseType(DialectAsmParser &parser) const override;
+
+  // Prints a type registered to this dialect.
+  void printType(Type type, DialectAsmPrinter &os) const override;
+};
+
+namespace HLOTypes {
+enum Kind {
+  Token = Type::FIRST_XLA_HLO_TYPE,
+};
+}  // namespace HLOTypes
+
+class TokenType : public Type::TypeBase<TokenType, Type, TypeStorage> {
+ public:
+  using Base::Base;
+
+  static TokenType get(MLIRContext *context) {
+    return Base::get(context, HLOTypes::Token);
+  }
+
+  // Support method to enable LLVM-style type casting.
+  static bool kindof(unsigned kind) { return kind == HLOTypes::Token; }
+};
+
+// Shape derivation function that computes the shape of the result based on
+// the first argument. For a 2-dimensional input tensor, this produces IR of
+// the form
+//
+//  %0 = dim %arg0, 0 : memref<?x?xf32>
+//  %1 = index_cast %0 : index to i64
+//  %2 = dim %arg0, 1 : memref<?x?xf32>
+//  %3 = index_cast %2 : index to i64
+//  %4 = "xla_hlo.scalars_to_dimension_tensor"(%1, %3)
+//    : (i64, i64) -> tensor<2xi64>
+//
+// and returns %4 as the shape value.
+LogicalResult deriveShapeFromFirstOperand(
+    OpBuilder *builder, Operation *op,
+    SmallVectorImpl<Value> *reifiedReturnShapes);
+
+#define GET_OP_CLASSES
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc"
+
+}  // end namespace xla_hlo
+}  // end namespace mlir
+
+#endif  //  TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_OPS_H_
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td
new file mode 100644
index 0000000..97a10d9
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td
@@ -0,0 +1,1390 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is the operation definition file for XLA HLO ops which map to the
+// traditional definition in xla_data.proto (or are aligned with the goals
+// thereof).
+// See: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/xla_data.proto
+
+#ifndef HLO_OPS
+#define HLO_OPS
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/InferTypeOpInterface.td"
+include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td"
+include "mlir-hlo/Dialect/mhlo/IR/hlo_utils.td"
+include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td"
+
+def HLO_Dialect : Dialect {
+  let name = "xla_hlo";
+  let cppNamespace = "xla_hlo";
+}
+
+class HLO_Op<string mnemonic, list<OpTrait> traits> :
+    Op<HLO_Dialect, mnemonic, traits> {
+  // Whether this operation has a custom conversion to HLO or not.
+  bit hasCustomHLOConverter = 0b0;
+
+  // TODO(b/129012527) Much of this custom verification should be expressed as
+  // type constraints.
+  let verifier = [{ return Verify(*this); }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA nullary op definitions.
+//===----------------------------------------------------------------------===//
+
+def HLO_ConstOp : HLO_Op<"constant",
+    [ConstantLike, NoSideEffect, AllTypesMatch<["value", "output"]>]>,
+    BASE_HLO_ConstOp {
+  let arguments = (ins
+    ElementsAttr:$value
+  );
+
+  let results = (outs
+    HLO_Tensor:$output
+  );
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, Attribute value"
+  >];
+
+  let printer = [{ return Print(*this, &p); }];
+  let parser = [{ return ParseConstOp(&parser, &result); }];
+
+  let hasFolder = 1;
+
+  // Constant has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_IotaOp : HLO_Op<"iota", [NoSideEffect]>, BASE_HLO_IotaOp {
+  let arguments = (ins I64Attr:$iota_dimension);
+
+  let results = (outs HLO_IntFpOrComplexTensor:$output);
+
+  // TODO(b/130357376): Iota has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_DynamicIotaOp: HLO_Op<"dynamic_iota", [NoSideEffect]> {
+  let summary = "Create linear increasing values from 0 to length -1.";
+  let description = [{
+    Produces an HLO Tensor of the specified shape, with an incremental set of
+    values along the specified dimension starting at 0.
+
+    Requires:
+    - The output length of the tensor result.
+  }];
+
+  let arguments = (ins HLO_DimensionTensor:$output_shape, I64Attr:$iota_dimension);
+  let results = (outs HLO_Tensor:$result);
+
+  let hasCanonicalizer = 1;
+  // Cannot be exported to legacy formats.
+  let hasCustomHLOConverter = 1;
+}
+
+
+def HLO_CreateTokenOp : HLO_Op<"create_token", [NoSideEffect]> {
+  string summary = "Create Token operator";
+
+  string description = [{
+    Produces a HLO token. Tokens are used for ordering side-effecting perations.
+    This is exported to HLO as an AfterAll operation with no operands to
+    generate a token.
+  }];
+
+  let results = (outs HLO_Token:$output);
+}
+
+//===----------------------------------------------------------------------===//
+// XLA unary elementwise op definitions.
+//===----------------------------------------------------------------------===//
+// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions
+
+class HLO_UnaryElementwiseOp<string mnemonic, list<OpTrait> traits,
+      Type TensorType>: HLO_Op<mnemonic,
+        !listconcat(traits, [InferShapedTypeOpInterface, InferFusibilityOpInterface])> {
+    let arguments = (ins TensorType:$operand);
+    let results = (outs TensorType);
+    let extraClassDeclaration = [{
+      static  LogicalResult inferReturnTypeComponents(
+          MLIRContext* context, Optional<Location> location,
+          ValueRange operands, DictionaryAttr attributes, RegionRange regions,
+          SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
+        return failure();
+      }
+      LogicalResult reifyReturnTypeShapes(
+          OpBuilder& builder, SmallVectorImpl<Value>& reifiedReturnShapes) {
+        return deriveShapeFromFirstOperand(&builder, getOperation(),
+                                           &reifiedReturnShapes);
+      }
+      bool inferInputOutputShapeEquality(int input, int output) {
+        return true;
+      }
+      llvm::Optional<Value> inferEffectiveWorkloadShape() {
+        return getOperation()->getResult(0);
+      }
+    }];
+}
+
+// Abs supports complex to real, so element type is not guaranteed to match.
+def HLO_AbsOp: HLO_UnaryElementwiseOp<"abs",
+    [NoSideEffect, SameOperandsAndResultShape],
+     TensorOf<[HLO_SInt, AnyFloat, HLO_Complex]>>, BASE_HLO_AbsOp {
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, Value operand"
+  >];
+}
+
+def HLO_CeilOp: HLO_UnaryElementwiseOp<"ceil",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_CeilOp;
+
+def HLO_ConvertOp : HLO_UnaryElementwiseOp<
+    "convert", [NoSideEffect, SameOperandsAndResultShape], HLO_Tensor>,
+    BASE_HLO_ConvertOp {
+
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value operand, "
+    "Type result_element_ty"
+  >];
+
+  let hasFolder = 1;
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_ClzOp: HLO_UnaryElementwiseOp<"count_leading_zeros",
+    [NoSideEffect, SameOperandsAndResultType], HLO_IntTensor>,
+    BASE_HLO_ClzOp;
+
+def HLO_CosOp: HLO_UnaryElementwiseOp<"cosine",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_CosOp;
+
+def HLO_ExpOp: HLO_UnaryElementwiseOp<"exponential",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_ExpOp;
+
+def HLO_Expm1Op: HLO_UnaryElementwiseOp<"exponential_minus_one",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_Expm1Op;
+
+def HLO_FloorOp: HLO_UnaryElementwiseOp<"floor",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_FloorOp;
+
+def HLO_ImagOp: HLO_Op<
+    "imag", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_ImagOp {
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value val">];
+
+  let arguments = (ins HLO_ComplexTensor);
+  let results = (outs HLO_FpTensor);
+  let hasFolder = 1;
+}
+
+def HLO_IsFiniteOp: HLO_UnaryElementwiseOp<"is_finite",
+    [NoSideEffect, SameOperandsAndResultShape], HLO_Tensor>,
+    BASE_HLO_IsFiniteOp {
+  let arguments = (ins HLO_FpTensor:$x);
+  let results = (outs HLO_PredTensor:$y);
+}
+
+def HLO_LogOp: HLO_UnaryElementwiseOp<"log",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_LogOp;
+
+def HLO_Log1pOp: HLO_UnaryElementwiseOp<"log_plus_one",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_Log1pOp;
+
+def HLO_LogisticOp: HLO_UnaryElementwiseOp<"logistic",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_LogisticOp;
+
+def HLO_NotOp: HLO_UnaryElementwiseOp<"not",
+    [NoSideEffect, SameOperandsAndResultType], HLO_PredOrIntTensor>,
+    BASE_HLO_NotOp;
+
+def HLO_NegOp: HLO_UnaryElementwiseOp<"negate",
+    [NoSideEffect, SameOperandsAndResultType], HLO_IntFpOrComplexTensor>,
+    BASE_HLO_NegOp;
+
+def HLO_PopulationCountOp: HLO_UnaryElementwiseOp<"popcnt",
+    [NoSideEffect, SameOperandsAndResultType], HLO_IntTensor>,
+    BASE_HLO_PopulationCountOp;
+
+def HLO_RealOp: HLO_Op<
+    "real", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_RealOp {
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value val">];
+
+  let arguments = (ins HLO_ComplexTensor);
+  let results = (outs HLO_FpTensor);
+  let hasFolder = 1;
+}
+
+def HLO_RoundOp: HLO_UnaryElementwiseOp<"round_nearest_afz",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_RoundOp;
+
+def HLO_RsqrtOp: HLO_UnaryElementwiseOp<"rsqrt",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_RsqrtOp;
+
+def HLO_SignOp: HLO_UnaryElementwiseOp<"sign",
+    [NoSideEffect, SameOperandsAndResultType],
+    TensorOf<[HLO_SInt, AnyFloat, HLO_Complex]>>,
+    BASE_HLO_SignOp;
+
+def HLO_SinOp: HLO_UnaryElementwiseOp<"sine",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_SinOp;
+
+def HLO_SqrtOp: HLO_UnaryElementwiseOp<"sqrt",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
+    BASE_HLO_SqrtOp;
+
+def HLO_TanhOp: HLO_UnaryElementwiseOp<"tanh",
+    [NoSideEffect, SameOperandsAndResultType],
+    HLO_FpOrComplexTensor>, BASE_HLO_TanhOp;
+
+//===----------------------------------------------------------------------===//
+// XLA binary elementwise op definitions.
+//===----------------------------------------------------------------------===//
+// See https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations
+
+class HLO_BinaryElementwiseOp<string mnemonic, list<OpTrait> traits> :
+        HLO_Op<mnemonic, !listconcat(traits, [InferShapedTypeOpInterface, InferFusibilityOpInterface])> {
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs
+  );
+
+  let extraClassDeclaration = [{
+    static  LogicalResult inferReturnTypeComponents(
+        MLIRContext* context, Optional<Location> location, ValueRange operands,
+        DictionaryAttr attributes, RegionRange regions,
+        SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
+      return failure();
+    }
+    LogicalResult reifyReturnTypeShapes(
+        OpBuilder& builder, SmallVectorImpl<Value>& reifiedReturnShapes) {
+      return deriveShapeFromFirstOperand(&builder, getOperation(),
+                                         &reifiedReturnShapes);
+    }
+    bool inferInputsShapeEquality(int lhs, int rhs) {
+      return true;
+    }
+    bool inferInputOutputShapeEquality(int input, int output) {
+      return true;
+    }
+    llvm::Optional<Value> inferEffectiveWorkloadShape() {
+      return getOperation()->getResult(0);
+    }
+  }];
+
+  let results = (outs HLO_Tensor);
+  let parser = [{ return mlir::impl::parseOneResultSameOperandTypeOp(parser, result); }];
+  let printer = [{ return mlir::impl::printOneResultOp(getOperation(), p); }];
+}
+
+def HLO_AddOp : HLO_BinaryElementwiseOp<"add",
+      [Commutative, NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_AddOp {
+  let hasFolder = 1;
+}
+
+def HLO_Atan2Op : HLO_BinaryElementwiseOp<"atan2",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_Atan2Op;
+
+def HLO_ComplexOp: HLO_Op<"complex",
+    [NoSideEffect, SameOperandsAndResultShape]>,
+    BASE_HLO_ComplexOp {
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value lhs, Value rhs">];
+
+  let arguments = (ins HLO_FpTensor:$lhs, HLO_FpTensor:$rhs);
+  let results = (outs HLO_ComplexTensor);
+  let hasFolder = 1;
+}
+
+def HLO_DivOp : HLO_BinaryElementwiseOp<"divide",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_DivOp {
+  let hasFolder = 1;
+}
+
+def HLO_MaxOp : HLO_BinaryElementwiseOp<"maximum",
+      [Commutative, NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_MaxOp {
+  let hasFolder = 1;
+}
+
+def HLO_MinOp : HLO_BinaryElementwiseOp<"minimum",
+      [Commutative, NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_MinOp {
+  let hasFolder = 1;
+}
+
+def HLO_MulOp : HLO_BinaryElementwiseOp<"multiply",
+      [Commutative, NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_MulOp {
+  let hasFolder = 1;
+}
+
+def HLO_PowOp : HLO_BinaryElementwiseOp<"power",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_PowOp;
+
+def HLO_RemOp : HLO_BinaryElementwiseOp<"remainder",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_RemOp;
+
+def HLO_ShiftLeftOp : HLO_BinaryElementwiseOp<"shift_left",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_ShiftLeftOp;
+
+def HLO_ShiftRightArithmeticOp : HLO_BinaryElementwiseOp<"shift_right_arithmetic",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_ShiftRightArithmeticOp;
+
+def HLO_ShiftRightLogicalOp : HLO_BinaryElementwiseOp<"shift_right_logical",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_ShiftRightLogicalOp;
+
+def HLO_SubOp : HLO_BinaryElementwiseOp<"subtract",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_SubOp {
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// XLA binary logical elementwise op definitions.
+//===----------------------------------------------------------------------===//
+
+// See https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations
+class HLO_BinaryLogicalElementwiseOp<string mnemonic> :
+        HLO_BinaryElementwiseOp<
+            mnemonic, [Commutative, NoSideEffect, SameOperandsAndResultType]> {
+  let arguments = (ins
+    HLO_PredOrIntTensor:$lhs,
+    HLO_PredOrIntTensor:$rhs
+  );
+}
+
+def HLO_AndOp: HLO_BinaryLogicalElementwiseOp<"and">, BASE_HLO_AndOp;
+def HLO_OrOp: HLO_BinaryLogicalElementwiseOp<"or">, BASE_HLO_OrOp;
+def HLO_XorOp : HLO_BinaryLogicalElementwiseOp<"xor">, BASE_HLO_XorOp;
+
+//===----------------------------------------------------------------------===//
+// XLA communication op definitions.
+//===----------------------------------------------------------------------===//
+
+// InfeedOp corresponds to 'InfeedWithToken' xla client API and not 'Infeed'.
+// InfeedWithToken allows ordering of infeed HLO instructions using tokens.
+def HLO_InfeedOp : HLO_Op<"infeed", []> {
+
+  string summary = "Infeed operator";
+
+  string description = [{
+    Reads a single data item from the implicit Infeed streaming interface of
+    the device, interpreting the data as the given shape, and returns a XlaOp
+    of the data. Multiple Infeed operations are allowed in a computation, but
+    there must be a total order among the Infeed operations.
+
+    See https://www.tensorflow.org/xla/operation_semantics#infeed.
+  }];
+
+  let arguments = (ins
+    HLO_Token:$token,
+    DefaultValuedAttr<StrAttr, "">:$infeed_config
+  );
+  let results = (outs HLO_Tuple);
+  let hasCustomHLOConverter = 1;
+}
+
+// OutfeedOp corresponds to 'OutfeedWithToken' xla client API and not 'Outfeed'.
+// OutfeedWithToken allows ordering of outfeed HLO instructions using tokens.
+def HLO_OutfeedOp : HLO_Op<"outfeed", []> {
+
+  string summary = "Outfeed operator";
+
+  string description = [{
+    Generates outgoing data transfers for the given data. It takes data and a
+    token type operand and produces a token type value. Tokens are used for
+    ordering side-effecting operations.
+
+    See https://www.tensorflow.org/xla/operation_semantics#outfeed.
+  }];
+
+  let arguments = (ins
+    HLO_TensorOrTuple:$operand,
+    HLO_Token:$token,
+    DefaultValuedAttr<StrAttr, "">:$outfeed_config
+  );
+  let results = (outs HLO_Token);
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_SendOp : HLO_Op<"send", []> {
+
+  string summary = "Send operator";
+
+  string description = [{
+    Sends the given operand data to a Recv instruction in another computation
+    that shares the same channel handle. Does not return any data. Similar to
+    the Recv operation, Send operation represents synchronous communication,
+    and is internally decomposed into 2 HLO instructions (Send and SendDone) to
+    enable asynchronous data transfers.
+
+    See https://www.tensorflow.org/xla/operation_semantics#send.
+  }];
+
+  let arguments = (ins
+    HLO_TensorOrTuple:$operand,
+    HLO_Token:$token,
+    ChannelHandle<HLO_Dialect>:$channel_id,
+    DefaultValuedAttr<BoolAttr, "false">:$is_host_transfer
+  );
+
+  let results = (outs HLO_Token);
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_RecvOp : HLO_Op<"recv", []> {
+
+  string summary = "Recv operator";
+
+  string description = [{
+    Receives data of the given shape from a Send instruction in another
+    computation that shares the same channel handle. Returns a tuple containing
+    value for the received data and a token. Recv operation represents
+    synchronous communication. However, the instruction is internally decomposed
+    into 2 HLO instructions (Recv and RecvDone) to enable asynchronous data
+    transfers.
+
+    See https://www.tensorflow.org/xla/operation_semantics#recv.
+  }];
+
+  let arguments = (ins
+    HLO_Token:$token,
+    ChannelHandle<HLO_Dialect>:$channel_id,
+    DefaultValuedAttr<BoolAttr, "false">:$is_host_transfer
+  );
+
+  let results = (outs HLO_Tuple);
+  let hasCustomHLOConverter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// XLA parallelism related op definitions.
+//===----------------------------------------------------------------------===//
+
+def HLO_ReplicaIdOp : HLO_Op<"replica_id", [NoSideEffect]>,
+      BASE_HLO_ReplicaIdOp {
+  // TODO(prakalps): The output should unsigned 32-bit integer but mlir does
+  // not differentiate between signed and unsigned int.
+  let results = (outs I32Tensor);
+}
+
+//===----------------------------------------------------------------------===//
+// XLA control flow op definitions.
+//===----------------------------------------------------------------------===//
+
+def HLO_AfterAllOp : HLO_Op<"after_all", [NoSideEffect]> {
+
+  string summary = "AfterAll operator";
+
+  string description = [{
+    AfterAll takes a variadic number of tokens and produces a single token.
+    Tokens are primitive types which can be threaded between side-effecting
+    operations to enforce ordering. AfterAll can be used as a join of tokens
+    for ordering a operation after a set operations.
+
+    See https://www.tensorflow.org/xla/operation_semantics#afterall.
+  }];
+
+  let arguments = (ins Variadic<HLO_Token>:$operands);
+  let results = (outs HLO_Token);
+}
+
+// Xla Client API has two separate calls for indexed and predicated conditional,
+// although both eventually map to kConditional HLO. IfOp maps to predicated
+// conditional use of kConditional HLO.
+def HLO_IfOp: HLO_Op<"if", [RecursiveSideEffects]> {
+  string summary = "If operator";
+
+  string description = [{
+    Returns the result of executing either a true or false function depending on
+    the result of a condition function.
+
+    See https://www.tensorflow.org/xla/operation_semantics#conditional.
+  }];
+
+  let arguments = (ins
+    HLO_PredTensor:$pred,
+    HLO_TensorOrTuple:$true_arg,
+    HLO_TensorOrTuple:$false_arg
+  );
+
+  let regions = (region AnyRegion:$true_branch,
+                        AnyRegion:$false_branch);
+
+  let results = (outs HLO_TensorOrTuple);
+
+  // TODO(b/129422361): ConditionalOp has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+// Xla Client API has two separate calls for indexed and predicated conditional,
+// although both eventually map to kConditional HLO. CaseOp maps to indexed
+// conditional use of kConditional HLO.
+def HLO_CaseOp: HLO_Op<"case", [RecursiveSideEffects]>,
+      BASE_HLO_CaseOp {
+
+  let arguments = (ins
+    I32Tensor:$index,
+    Variadic<HLO_TensorOrTuple>:$branch_operands
+  );
+
+  let regions = (region VariadicRegion<AnyRegion>:$branches);
+
+  let results = (outs Variadic<HLO_TensorOrTuple>);
+
+  let hasCustomHLOConverter = 1;
+}
+
+
+def HLO_WhileOp: HLO_Op<"while", [RecursiveSideEffects,
+                                  SameOperandsAndResultType]>,
+                 BASE_HLO_WhileOp {
+  let arguments = (ins HLO_TensorOrTuple:$val);
+
+  let regions = (region AnyRegion:$cond, AnyRegion:$body);
+
+  let results = (outs HLO_TensorOrTuple);
+
+  // TODO(b/129422361): WhileOp has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_AllReduceOp : HLO_Op<"all_reduce",
+    [SameOperandsAndResultType]>, BASE_HLO_AllReduceOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$replica_groups,
+    OptionalAttr<ChannelHandle<HLO_Dialect>>:$channel_id
+  );
+  let regions = (region SizedRegion<1>:$computation);
+  let results = (outs HLO_Tensor);
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_AllToAllOp : HLO_Op<"all_to_all",
+    [NoSideEffect, SameOperandsElementType, SameOperandsShape]>, BASE_HLO_AllToAllOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64Attr:$split_dimension,
+    I64Attr:$concat_dimension,
+    I64Attr:$split_count,
+    I64ElementsAttr:$replica_groups
+  );
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_ReduceOp: HLO_Op<"reduce", [
+      RecursiveSideEffects,
+      SameVariadicOperandSize,
+      SingleBlockImplicitTerminator<"ReturnOp">,
+      InferFusibilityOpInterface
+    ]>, BASE_HLO_ReduceOp {
+  let arguments = (ins
+    Variadic<HLO_TensorOrTuple>:$operands,
+    Variadic<HLO_TensorOrTuple>:$init_values,
+    I64ElementsAttr:$dimensions
+  );
+
+  let results = (outs Variadic<HLO_TensorOrTuple>);
+
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &state, ValueRange operands, "
+    "ValueRange init_values, DenseIntElementsAttr dimensions"
+  >];
+
+  let extraClassDeclaration = [{
+    bool isFusibleWithConsumer() {
+      return false;
+    }
+    llvm::Optional<Value> inferEffectiveWorkloadShape() {
+      return getOperation()->getOperand(0);
+    }
+  }];
+
+  let hasFolder = 1;
+
+  // TODO(hinsu): Verify that the attached body arguments and results are
+  // compatible with reduce op's operands.
+  let regions = (region SizedRegion<1>:$body);
+
+  // TODO(hinsu): Implement custom printer and parser.
+
+  // TODO(b/129422361): ReduceOp has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// XLA tuple op definitions.
+//===----------------------------------------------------------------------===//
+def HLO_GetTupleElementOp: HLO_Op<"get_tuple_element", [NoSideEffect]>, BASE_HLO_GetTupleElementOp {
+  let arguments = (ins
+    HLO_Tuple,
+    I32Attr:$index
+  );
+
+  let results = (outs HLO_TensorOrTokenOrTuple);
+
+  let hasFolder = 1;
+
+  let builders = [OpBuilder<
+                  "OpBuilder &builder, OperationState &results, "
+                  "Value  value, int32_t index">];
+}
+
+def HLO_TupleOp : HLO_Op<"tuple", [NoSideEffect]>, BASE_HLO_TupleOp {
+  let arguments = (ins Variadic<HLO_TensorOrTuple>:$val);
+  let results = (outs HLO_Tuple);
+
+  let builders = [OpBuilder<
+                  "OpBuilder &builder, OperationState &results, "
+                  "ValueRange values">];
+
+}
+
+def HLO_CompareOp: HLO_Op<"compare",
+      [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape]>,
+      BASE_HLO_CompareOp {
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    HLO_ComparisonDirectionAttr:$comparison_direction
+  );
+  let results = (outs HLO_PredTensor);
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, "
+    "StringAttr comparison_direction"
+  >];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA Slice definitions.
+//===----------------------------------------------------------------------===//
+
+def HLO_SliceOp: HLO_Op<
+      "slice",
+      [NoSideEffect, SameOperandsAndResultElementType,
+       AllTypesMatch<["start_indices", "limit_indices", "strides"]>]> {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$start_indices,
+    I64ElementsAttr:$limit_indices,
+    I64ElementsAttr:$strides
+  );
+
+  let results = (outs HLO_Tensor);
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, Value operand, "
+    "DenseIntElementsAttr start_indices, DenseIntElementsAttr limit_indices, "
+    "DenseIntElementsAttr strides"
+  >];
+
+  let extraClassDeclaration = [{
+    // Infers output type for given operand and attributes. Result type is
+    // unranked if any of the attributes is illegal.
+    static Type InferOutputTypes(Builder *builder, Value operand,
+                                 DenseIntElementsAttr start_indices,
+                                 DenseIntElementsAttr limit_indices,
+                                 DenseIntElementsAttr strides);
+  }];
+}
+
+def HLO_DynamicSliceOp: HLO_Op<"dynamic-slice",
+      [NoSideEffect, AllElementTypesMatch<["operand", "result"]>]> {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    Variadic<HLO_ScalarIntTensor>:$start_indices,
+    I64ElementsAttr:$slice_sizes
+  );
+
+  let results = (outs HLO_Tensor:$result);
+  let hasCanonicalizer = 1;
+}
+
+def HLO_DynamicUpdateSliceOp: HLO_Op<"dynamic-update-slice",
+      [NoSideEffect, AllElementTypesMatch<["operand", "update", "result"]>,
+       AllShapesMatch<["operand", "result"]>]> {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$update,
+    Variadic<HLO_ScalarIntTensor>:$start_indices
+  );
+
+  let results = (outs HLO_Tensor:$result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XLA Other op definitions.
+//===----------------------------------------------------------------------===//
+
+def HLO_BatchNormGradOp : HLO_Op<"batch_norm_grad", [NoSideEffect]>,
+    BASE_HLO_BatchNormGradOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$scale,
+    HLO_Tensor:$mean,
+    HLO_Tensor:$variance,
+    HLO_Tensor:$grad_output,
+    F32Attr:$epsilon,
+    I64Attr:$feature_index
+  );
+
+  let results = (outs HLO_Tuple);
+}
+
+def HLO_BatchNormInferenceOp : HLO_Op<"batch_norm_inference",
+    [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_BatchNormInferenceOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$scale,
+    HLO_Tensor:$offset,
+    HLO_Tensor:$mean,
+    HLO_Tensor:$variance,
+    F32Attr:$epsilon,
+    I64Attr:$feature_index
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_BatchNormTrainingOp : HLO_Op<"batch_norm_training", [NoSideEffect]>,
+    BASE_HLO_BatchNormTrainingOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$scale,
+    HLO_Tensor:$offset,
+    F32Attr:$epsilon,
+    I64Attr:$feature_index
+  );
+
+  let results = (outs HLO_Tuple);
+}
+
+def HLO_BitcastConvertOp : HLO_Op<"bitcast_convert",
+    [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_BitcastConvertOp {
+
+  let arguments = (ins HLO_Tensor:$operand);
+  let results = (outs HLO_Tensor);
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_BroadcastOp : HLO_Op<"broadcast",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_BroadcastOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$broadcast_sizes
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_BroadcastInDimOp : HLO_Op<"broadcast_in_dim",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_BroadcastInDimOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    BroadcastDimAttr:$broadcast_dimensions
+  );
+
+  let results = (outs HLO_StaticShapeTensor);
+
+  let hasFolder = 1;
+  // Only handles a static subset of the legacy format.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_DynamicBroadcastInDimOp : HLO_Op<"dynamic_broadcast_in_dim",
+      [NoSideEffect]> {
+  string summary = "Broadcast a tensor into the given dynamic shape by adding dimensions.";
+  string description = [{
+    This is a generalization of the BroadcastInDimOp which accepts its output
+    dimensions as an argument. It should eventually supercede the statically
+    shaped original, but is being phased as a separate op in order to support
+    compatibility with lowerings and translations that precede dynamic
+    shapes.
+  }];
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_DimensionTensor:$output_dimensions,
+    BroadcastDimAttr:$broadcast_dimensions
+  );
+
+  let results = (outs HLO_Tensor);
+
+  let hasCanonicalizer = 1;
+  // Cannot be exported to legacy formats.
+  let hasCustomHLOConverter = 1;
+}
+
+// Note: There is no HLO_CallOp because the standard call operation mlir::CallOp
+// is used instead. A mlir::CallOp is exported to a HLO call instruction
+// directly.
+
+def HLO_CholeskyOp : HLO_Op<"cholesky",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_CholeskyOp {
+  let arguments = (ins
+    HLO_FpOrComplexTensor:$a,
+    DefaultValuedAttr<BoolAttr, "false">:$lower
+  );
+
+  let results = (outs HLO_FpOrComplexTensor);
+}
+
+def HLO_ClampOp : HLO_Op<"clamp",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_ClampOp {
+  let arguments = (ins
+    HLO_Tensor:$min,
+    HLO_Tensor:$operand,
+    HLO_Tensor:$max
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_ConcatenateOp : HLO_Op<"concatenate",
+    [NoSideEffect, SameOperandsAndResultElementType, DeclareOpInterfaceMethods<InferTypeOpInterface>]>, BASE_HLO_ConcatenateOp {
+
+  let arguments = (ins
+    Variadic<HLO_Tensor>:$val,
+    I64Attr: $dimension
+  );
+
+  let results = (outs HLO_Tensor);
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+
+}
+
+def HLO_CollectivePermuteOp: HLO_Op<"collective_permute",
+    [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_CollectivePermuteOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$source_target_pairs
+  );
+  let results = (outs HLO_Tensor);
+}
+
+// TODO(hinsu): Make this struct dialect independent so that it can be shared
+// between HLO and LHLO dialect.
+def ConvDimensionNumbers : StructAttr<"ConvDimensionNumbers", HLO_Dialect, [
+  StructFieldAttr<"input_batch_dimension",I64Attr>,
+  StructFieldAttr<"input_feature_dimension", I64Attr>,
+  StructFieldAttr<"input_spatial_dimensions", I64ElementsAttr>,
+  StructFieldAttr<"kernel_input_feature_dimension", I64Attr>,
+  StructFieldAttr<"kernel_output_feature_dimension", I64Attr>,
+  StructFieldAttr<"kernel_spatial_dimensions", I64ElementsAttr>,
+  StructFieldAttr<"output_batch_dimension", I64Attr>,
+  StructFieldAttr<"output_feature_dimension", I64Attr>,
+  StructFieldAttr<"output_spatial_dimensions", I64ElementsAttr>] > {
+
+  let description = "Structure of dimension information for conv op";
+}
+
+def HLO_ConvOp : HLO_Op<"convolution", [NoSideEffect]>, BASE_HLO_ConvOp {
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    // Default value: one for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$window_strides,
+    // Default value: zero for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$padding,
+    // Default value: one for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$lhs_dilation,
+    // Default value: one for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$rhs_dilation,
+    ConvDimensionNumbers:$dimension_numbers,
+    I64Attr:$feature_group_count,
+    I64Attr:$batch_group_count,
+    HLO_PrecisionConfigAttr:$precision_config
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_CopyOp: HLO_Op<"copy", [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_CopyOp {
+  let arguments = (ins HLO_Tensor);
+  let results = (outs HLO_Tensor);
+  let hasFolder = 1;
+}
+
+def HLO_CrossReplicaSumOp : HLO_Op<"cross-replica-sum",
+    [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_CrossReplicaSumOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$replica_groups
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_CustomCallOp: HLO_Op<"custom_call", []>, BASE_HLO_CustomCallOp {
+  let arguments = (ins
+    Variadic<HLO_Tensor>:$args,
+    StrAttr:$call_target_name,
+    DefaultValuedAttr<BoolAttr, "false">:$has_side_effect,
+    DefaultValuedAttr<StrAttr, "">:$backend_config
+  );
+  let results = (outs HLO_Tensor);
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_DotOp: HLO_Op<"dot", [NoSideEffect]>, BASE_HLO_DotOp {
+  let arguments = (
+    ins HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    HLO_PrecisionConfigAttr:$precision_config
+  );
+  let results = (outs HLO_Tensor);
+}
+
+def DotDimensionNumbers : StructAttr<"DotDimensionNumbers", HLO_Dialect, [
+                StructFieldAttr<"lhs_batching_dimensions",   I64ElementsAttr>,
+                StructFieldAttr<"rhs_batching_dimensions",   I64ElementsAttr>,
+                StructFieldAttr<"lhs_contracting_dimensions", I64ElementsAttr>,
+                StructFieldAttr<"rhs_contracting_dimensions", I64ElementsAttr>
+  ]> {
+  let description = "Structure of dimension information for dot product";
+}
+
+def HLO_DotGeneralOp: HLO_Op<"dot_general", [NoSideEffect]>, BASE_HLO_DotGeneralOp {
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    DotDimensionNumbers:$dot_dimension_numbers,
+    HLO_PrecisionConfigAttr:$precision_config
+  );
+
+  let results = (outs HLO_Tensor);
+  let verifier = [{ return Verify(*this); }];
+}
+
+// Define Base Einsum op within the HLO dialect as these are client ops and
+// therefore this class is not common between HLO and LHLO ops.
+class BASE_EinsumOp {
+  string summary = "Einsum operator";
+
+  string description = [{
+    Returns a tensor whose elements are defined by equation, which is written
+    in a shorthand form inspired by the Einstein summation convention.
+  }];
+}
+
+def HLO_EinsumOp: HLO_Op<"einsum", [NoSideEffect]>, BASE_EinsumOp {
+  let arguments = (ins
+    HLO_Tensor:$lhs,
+    HLO_Tensor:$rhs,
+    StrAttr:$einsum_config
+  );
+
+  let results = (outs HLO_Tensor);
+
+  // TODO(hinsu): Canonicalize to lower this client side HLO op to server
+  // side HLO ops.
+}
+
+def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]>, BASE_EinsumOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    StrAttr:$einsum_config
+  );
+
+  let results = (outs HLO_Tensor);
+
+  let hasCanonicalizer = 1;
+
+  // UnaryEinsumOp is unconditionally canonicalized to the binary EinsumOp so
+  // the HLO converter shouldn't be invoked.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_FftOp: HLO_Op<"fft", [NoSideEffect]>, BASE_HLO_FftOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_FftTypeAttr: $fft_type,
+    I64ElementsAttr:$fft_length
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def GatherDimensionNumbers : StructAttr<"GatherDimensionNumbers", HLO_Dialect,
+      [StructFieldAttr<"offset_dims", I64ElementsAttr>,
+      StructFieldAttr<"collapsed_slice_dims", I64ElementsAttr>,
+      StructFieldAttr<"start_index_map", I64ElementsAttr>,
+      StructFieldAttr<"index_vector_dim", I64Attr>]> {
+  let description = "Structure of dimension information for gather";
+}
+
+def HLO_GatherOp: HLO_Op<"gather", [NoSideEffect]>, BASE_HLO_GatherOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_IntTensor:$start_indices,
+    GatherDimensionNumbers:$dimension_numbers,
+    I64ElementsAttr:$slice_sizes,
+    DefaultValuedAttr<BoolAttr, "false">:$indices_are_sorted
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_GetDimensionSizeOp: HLO_Op<"get_dimension_size", [NoSideEffect]>,
+      BASE_HLO_GetDimensionSizeOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I32Attr:$dimension
+  );
+  let results = (outs HLO_IntTensor);
+}
+
+def HLO_MapOp: HLO_Op<"map",
+      [RecursiveSideEffects, SameOperandsElementType,
+       SameOperandsAndResultShape, SingleBlockImplicitTerminator<"ReturnOp">]>,
+      BASE_HLO_MapOp {
+  let arguments = (ins
+    Variadic<HLO_Tensor>:$operands,
+    I64ElementsAttr:$dimensions
+  );
+  let regions = (region SizedRegion<1>:$computation);
+  let results = (outs HLO_Tensor);
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_ReshapeOp: HLO_Op<"reshape",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_ReshapeOp {
+  let arguments = (ins HLO_Tensor:$operand);
+
+  let results = (outs HLO_StaticShapeTensor);
+  let hasFolder = 1;
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_DynamicReshapeOp: HLO_Op<"dynamic_reshape", [NoSideEffect]> {
+  let summary = "Reshape a tensor to a given, possibly dynamic, shape.";
+  let description = [{
+    Reshapes `operand` to `output_shape`.
+
+    Requires:
+    - The length of `output_shape` is equal to the rank of `result`.
+    - The number of elements in `operand` (that is, the product of extents of
+      its shape) is equal to the number of elements in `output_shape` (that is,
+      the product of values in `output_shape`).
+  }];
+
+  let arguments = (ins HLO_Tensor:$operand, HLO_DimensionTensor:$output_shape);
+  let results = (outs HLO_Tensor:$result);
+
+  let hasCanonicalizer = 1;
+  // Cannot be exported to legacy formats.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_ScatterOp: HLO_Op<"scatter", [RecursiveSideEffects]>,
+      BASE_HLO_ScatterOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$scatter_indices,
+    HLO_Tensor:$updates,
+    ScatterDimensionNumbers<HLO_Dialect>:$scatter_dimension_numbers,
+    DefaultValuedAttr<BoolAttr, "false">:$indices_are_sorted,
+    DefaultValuedAttr<BoolAttr, "false">:$unique_indices
+  );
+
+  let regions = (region SizedRegion<1>:$update_computation);
+
+  let results = (outs HLO_Tensor);
+
+  let hasCustomHLOConverter = 1;
+}
+
+// TODO(jpienaar): Add broadcastable trait.
+def HLO_SelectOp: HLO_Op<"select", [NoSideEffect, DeclareOpInterfaceMethods<InferTypeOpInterface>]>, BASE_HLO_SelectOp {
+  let arguments = (ins
+    HLO_PredTensor:$pred,
+    HLO_Tensor:$on_true,
+    HLO_Tensor:$on_false
+  );
+
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_SelectAndScatterOp: HLO_Op<"select_and_scatter",
+      [RecursiveSideEffects]>, BASE_HLO_SelectAndScatterOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$source,
+    HLO_Tensor:$init_value,
+    OptionalAttr<I64ElementsAttr>:$window_dimensions,
+    OptionalAttr<I64ElementsAttr>:$window_strides,
+    OptionalAttr<I64ElementsAttr>:$padding
+  );
+
+  let regions = (region SizedRegion<1>:$select, SizedRegion<1>:$scatter);
+
+  let results = (outs HLO_Tensor);
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_SetDimensionSizeOp: HLO_Op<"set_dimension_size", [NoSideEffect]>,
+      BASE_HLO_SetDimensionSizeOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I32Tensor:$size,
+    I32Attr:$dimension
+  );
+  let results = (outs HLO_Tensor);
+}
+
+def HLO_SortOp : HLO_Op<"sort", [RecursiveSideEffects]>, BASE_HLO_SortOp {
+  let arguments = (ins
+    Variadic<HLO_Tensor>:$operands,
+    DefaultValuedAttr<I64Attr, "-1">:$dimension,
+    DefaultValuedAttr<BoolAttr, "false">:$is_stable
+  );
+
+  let results = (outs HLO_TensorOrTuple);
+
+  let regions = (region SizedRegion<1>:$comparator);
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &state, ValueRange operands, "
+    "int64_t dimension = -1, bool is_stable = false"
+  >];
+
+  // TODO(b/129422361): SortOp has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_ReverseOp: HLO_Op<"reverse",
+      [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_ReverseOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$dimensions
+  );
+
+  let results = (outs HLO_Tensor);
+
+  let hasFolder = 1;
+}
+
+def HLO_PadOp: HLO_Op<"pad",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_PadOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$padding_value,
+    I64ElementsAttr: $edge_padding_low,
+    I64ElementsAttr: $edge_padding_high,
+    I64ElementsAttr: $interior_padding
+  );
+
+  let results = (outs HLO_Tensor);
+
+  let description = [{
+    Pads the `operand` according to TBD.
+  }];
+
+  // TODO(b/129422361): PadOp has a custom constructor for HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_TraceOp: HLO_Op<"trace", []>, BASE_HLO_TraceOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    StrAttr:$tag
+  );
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_TransposeOp: HLO_Op<"transpose",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_TransposeOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$permutation
+  );
+  let results = (outs HLO_Tensor);
+
+  let hasFolder = 1;
+}
+
+def HLO_TriangularSolveOp: HLO_Op<"triangular_solve",
+    [NoSideEffect, SameOperandsAndResultElementType]>,
+    BASE_HLO_TriangularSolveOp {
+  let arguments = (ins
+    HLO_FpOrComplexTensor:$a,
+    HLO_FpOrComplexTensor:$b,
+    BoolAttr:$left_side,
+    BoolAttr:$lower,
+    BoolAttr:$unit_diagonal,
+    HLO_TransposeAttr:$transpose_a
+  );
+  let results = (outs HLO_FpOrComplexTensor);
+}
+
+def HLO_ReduceWindowOp: HLO_Op<"reduce_window", [
+      RecursiveSideEffects,
+      SingleBlockImplicitTerminator<"ReturnOp">
+    ]>, BASE_HLO_ReduceWindowOp {
+
+  // TODO(hinsu): Verify that padding attribute is 2-d and the remaining
+  // attributes are 1-d. Attributes' leading dimension should match rank of the
+  // inputs.
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    HLO_Tensor:$init_value,
+    I64ElementsAttr:$window_dimensions,
+    // If strides or dilations attributes are missing then the default value is
+    // one for each of the input dimensions. Similarly, padding values are zero
+    // for both low and high in each of the dimensions, if not specified.
+    OptionalAttr<I64ElementsAttr>:$window_strides,
+    OptionalAttr<I64ElementsAttr>:$base_dilations,
+    OptionalAttr<I64ElementsAttr>:$window_dilations,
+    OptionalAttr<I64ElementsAttr>:$padding
+  );
+
+  let results = (outs HLO_Tensor);
+
+  // TODO(hinsu): Verify that the attached body arguments and results are
+  // compatible with reduce op's operands.
+  let regions = (region SizedRegion<1>:$body);
+
+  let hasCustomHLOConverter = 1;
+
+  // TODO(hinsu): Implement custom printer and parser.
+}
+
+def HLO_ReturnOp : HLO_Op<"return", [NoSideEffect, Terminator]> {
+  let summary = [{
+    The `hlo.return` operation terminates a region and returns values.
+  }];
+
+  let arguments = (ins
+    Variadic<HLO_TensorOrTuple>:$results
+  );
+
+  // Disable conversion operator for return op as the op is not an actual XLA
+  // instruction and is only used as a terminator for regions.
+  let hasCustomHLOConverter = 1;
+
+  // TODO(hinsu): Implement custom printer and parser.
+}
+
+def HLO_TorchIndexSelectOp : HLO_Op<"torch_index_select", [NoSideEffect]> {
+  let arguments = (ins
+    HLO_Tensor:$input,
+    HLO_Tensor:$index,
+    I64Attr:$dim,
+    I64Attr:$batch_dims
+  );
+
+  let results = (outs HLO_Tensor);
+
+  // TODO(hinsu): Canonicalize to lower this client side HLO op to server
+  // side HLO ops.
+}
+
+//===----------------------------------------------------------------------===//
+// XLA RngUniform Operator.
+//===----------------------------------------------------------------------===//
+def HLO_RngUniformOp : HLO_Op<"rng_uniform", []>, BASE_HLO_RngUniformOp {
+  let arguments = (ins
+    HLO_PredIntOrFpTensor:$a,
+    HLO_PredIntOrFpTensor:$b,
+    I64Tensor:$shape
+  );
+
+  let results = (outs HLO_PredIntOrFpTensor);
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_RngNormalOp : HLO_Op<"rng_normal", []>, BASE_HLO_RngNormalOp {
+  let arguments = (ins
+    HLO_FpTensor:$mu,
+    HLO_FpTensor:$sigma,
+    I64Tensor:$shape
+  );
+
+  let results = (outs HLO_FpTensor);
+
+  let hasCustomHLOConverter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// XLA Quantize Operator.
+//===----------------------------------------------------------------------===//
+def HLO_DequantizeOp : HLO_Op<"dequantize", [NoSideEffect]>,
+    BASE_HLO_DequantizeOp {
+  let arguments = (ins
+    TensorOf<[I32]>:$input,
+    F32Attr:$min_range,
+    F32Attr:$max_range,
+    HLO_DequantizeModeAttr:$mode,
+    BoolAttr:$transpose_output,
+    DefaultValuedAttr<BoolAttr, "false">:$is_16bits
+  );
+
+  let results = (outs TensorOf<[BF16]>:$output);
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_FusionOp : HLO_Op<"fusion", []> {
+  let summary = "Fusion operator";
+  let description = [{
+    Models the fusion instruction.
+
+    A fusion op is consists of a group of basic ops (represented as a region
+    attached to it). It serves as a hint to the backend that it is beneficial
+    to emit the contained ops into a single loop nest or kernel.
+  }];
+  let regions = (region SizedRegion<1>:$fused_computation);
+
+  let arguments = (ins
+    Variadic<HLO_TensorOrTuple>:$operands
+  );
+
+  let results = (outs
+    Variadic<HLO_TensorOrTuple>:$results
+  );
+
+  // FusionOp has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
+#endif // HLO_OPS
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td
new file mode 100644
index 0000000..98a0de3
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td
@@ -0,0 +1,1331 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef HLO_OPS_BASE
+#define HLO_OPS_BASE
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+
+def HLO_Pred : TypeAlias<I1, "pred (AKA boolean or 1-bit integer)">;
+
+// TODO(hinsu): Use signed integers instead of signless integer which is being
+// used for legacy reasons.
+def HLO_SInt : SignlessIntOfWidths<[8, 16, 32, 64]>;
+def HLO_UInt : UnsignedIntOfWidths<[8, 16, 32, 64]>;
+def HLO_Int : AnyTypeOf<[HLO_SInt, HLO_UInt]>;
+
+def HLO_Complex : Complex<AnyTypeOf<[F32, F64]>>;
+
+// The broadcasting dimensions correspond to a tuple that describes how a
+// smaller rank shape is broadcast into a larger rank shape. For example,
+// given a 2x3x4 cuboid and a 3x4 matrix, a broadcasting tuple (1,2) means
+// matching the matrix to dimensions 1 and 2 of the cuboid.
+defvar BroadcastDimAttr = I64ElementsAttr;
+
+//===----------------------------------------------------------------------===//
+// XLA on tensors type definitions.
+//===----------------------------------------------------------------------===//
+
+// Token type.
+def HLO_Token : Type<CPred<"$_self.isa<TokenType>()">, "token">;
+
+// Any integer tensor types
+def HLO_IntTensor : TensorOf<[HLO_Int]>;
+
+// Any integer tensor type with rank 0 (i.e. representing a single integer).
+def HLO_ScalarIntTensor : ShapedContainerType<
+  [HLO_Int], And<[IsTensorTypePred, HasAnyRankOfPred<[0]>]>,
+  "a 0-dim integer tensor">;
+
+// Any floating-point tensor types
+def HLO_FpTensor : TensorOf<[AnyFloat]>;
+
+def HLO_PredTensor : TensorOf<[HLO_Pred]>;
+
+def HLO_Tensor : TensorOf<[AnyFloat, HLO_Pred, HLO_Int, HLO_Complex]>;
+
+def HLO_ComplexTensor : TensorOf<[HLO_Complex]>;
+
+def HLO_Tuple : NestedTupleOf<[HLO_Tensor, HLO_Token]>;
+
+def HLO_TensorOrTuple : AnyTypeOf<[HLO_Tensor, HLO_Tuple]>;
+
+def HLO_TensorOrTokenOrTuple : AnyTypeOf<[HLO_Tensor, HLO_Token, HLO_Tuple]>;
+
+def HLO_DimensionValue : AnyTypeOf<[Index, HLO_Pred, HLO_Int]>;
+
+// Dynamic representation of a shape vector as a tensor.
+def HLO_DimensionTensor : ShapedContainerType<
+    [HLO_DimensionValue],
+    And<[IsTensorTypePred, HasAnyRankOfPred<[1]>]>,
+    "a 1D tensor of dimensions">;
+
+// In general, static shaped tensor constraints should be avoided unless
+// it is for a legacy op which is only correct with static shapes.
+def HLO_StaticShapeTensor : StaticShapeTensorOf<[
+      AnyFloat, HLO_Pred, HLO_Int, HLO_Complex]>;
+
+//===----------------------------------------------------------------------===//
+// XLA on tensors combined type definitions.
+//===----------------------------------------------------------------------===//
+
+// Any integer or floating-point tensor types
+def HLO_IntOrFpTensor : TensorOf<[HLO_Int, AnyFloat]>;
+
+// Any integer or predicate tensor types
+def HLO_PredOrIntTensor : TensorOf<[HLO_Pred, HLO_Int]>;
+
+// Any floating-point or complex tensor types
+def HLO_FpOrComplexTensor : TensorOf<[AnyFloat, HLO_Complex]>;
+
+// Any int, floating-point or complex tensor types
+def HLO_IntFpOrComplexTensor : TensorOf<[HLO_Int, AnyFloat, HLO_Complex]>;
+
+// Any pred, int or floating-point tensor types
+def HLO_PredIntOrFpTensor : TensorOf<[HLO_Pred, HLO_Int, AnyFloat]>;
+
+//===----------------------------------------------------------------------===//
+// XLA nullary op definitions.
+//===----------------------------------------------------------------------===//
+
+class BASE_HLO_ConstOp {
+  string summary = "Constant operator";
+
+  string description = [{
+    Represents a constant value.
+  }];
+}
+
+class BASE_HLO_IotaOp {
+  string summary = "Iota operator";
+
+  string description = [{
+    Creates a rank 1 array of values starting at zero and incrementing by one.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA unary elementwise op definitions.
+//===----------------------------------------------------------------------===//
+// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions
+
+class BASE_HLO_AbsOp {
+  string summary = "Absolute value operator";
+
+  string description = [{
+    Returns `abs(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_CeilOp {
+  string summary = "Ceil operator";
+
+  string description = [{
+    Returns `Ceil(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_ClzOp {
+  string summary = "Count-leading-zeros (Clz) operator";
+
+  string description = [{
+    Returns the number of leading zeros in each operand element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_ConvertOp {
+  string summary = "Convert operator";
+
+  string description = [{
+    Performs element-wise conversion of values from one type to another, e.g.
+    float to int.
+
+    See https://www.tensorflow.org/xla/operation_semantics#convertelementtype.
+  }];
+}
+
+class BASE_HLO_CosOp {
+  string summary = "Cos operator";
+
+  string description = [{
+    Returns `Cos(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_ExpOp {
+  string summary = "Exponential operator";
+
+  string description = [{
+    Returns `e^(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_Expm1Op {
+  string summary = "Exponential minus one operator";
+
+  string description = [{
+    Returns `e^(operand) - 1` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_FloorOp {
+  string summary = "Floor operator";
+
+  string description = [{
+    Returns `Floor(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_GetDimensionSizeOp {
+  string summary = "GetDimensionSize operator";
+
+  string description = [{
+    Returns the size of the given dimension of the operand.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#getdimensionsize.
+  }];
+}
+
+class BASE_HLO_ImagOp {
+  string summary = "Imag operator";
+
+  string description = [{
+    Returns `Imag(operand)` element-wise.
+  }];
+}
+
+class BASE_HLO_IsFiniteOp {
+  string summary = "IsFinite operator";
+
+  string description = [{
+    Tests whether each element of operand is finite, i.e., is not positive or
+    negative infinity, and is not NaN. Returns a tensor of 1-bit integers with
+    the same shape as the input, where each element is nonzero (i.e. true) if
+    and only if the corresponding input element is finite.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_LogOp {
+  string summary = "Logarithm operator";
+
+  string description = [{
+    Returns `log(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_Log1pOp {
+  string summary = "Log1p operator";
+
+  string description = [{
+    Returns `log(operand+1)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_LogisticOp {
+  string summary = "Logistic operator";
+
+  string description = [{
+    Returns `logistic(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_NegOp {
+  string summary = "Negation operator";
+
+  string description = [{
+    Returns `-operand` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_NotOp {
+  string summary = "Not operator";
+
+  string description = [{
+    Returns `!operand` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_PopulationCountOp {
+  string summary = "PopulationCount operator";
+
+  string description = [{
+    Returns the number of bits set in each operand element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_RealOp {
+  string summary = "Real operator";
+
+  string description = [{
+    Returns `Real(operand)` element-wise.
+  }];
+}
+
+class BASE_HLO_RoundOp {
+  string summary = "Round operator";
+
+  string description = [{
+    Returns `Round(operand)` element-wise, rounding to nearest integer with
+    half-way cases rounding away from zero.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_RsqrtOp {
+  string summary = "Reciprocal Square-root operator";
+
+  string description = [{
+    Returns `1.0 / sqrt(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_SignOp {
+  string summary = "Sign operator";
+
+  string description = [{
+    Returns `sign(operand)` element-wise, where
+
+    ```
+    sign(x) = -1  : x < 0
+            = -0  : x = -0
+            = NaN : x = NaN
+            = +0  : x = +0
+            = 1   : x > 0
+    ```
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_SinOp {
+  string summary = "Sin operator";
+
+  string description = [{
+    Returns `Sin(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_SqrtOp {
+  string summary = "Square-root operator";
+
+  string description = [{
+    Returns `sqrt(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+class BASE_HLO_TanhOp {
+  string summary = "Tanh operator";
+
+  string description = [{
+    Returns `tanh(operand)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA binary elementwise op definitions.
+//===----------------------------------------------------------------------===//
+
+class BASE_HLO_AddOp {
+  string summary = "Addition operator";
+
+  string description = [{
+    Returns `lhs + rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_ComplexOp {
+  string summary = "Complex operator";
+
+  string description = [{
+    Performs element-wise conversion of a pair of real and imaginary values to
+    a complex value.
+  }];
+}
+
+class BASE_HLO_DivOp {
+  string summary = "Division operator";
+
+  string description = [{
+    Returns `lhs / rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_MaxOp {
+  string summary = "Maximum operator";
+
+  string description = [{
+    Returns `max(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_MinOp {
+  string summary = "Minimum operator";
+
+  string description = [{
+    Returns `min(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_MulOp {
+  string summary = "Multiplication operator";
+
+  string description = [{
+    Returns `lhs * rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+class BASE_HLO_PowOp {
+  string summary = "Power operator";
+
+  string description = [{
+    Returns `lhs ^ rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_RemOp {
+  string summary = "Remainder operator";
+
+  string description = [{
+    Returns `lhs % rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_SubOp {
+  string summary = "Subtraction operator";
+
+  string description = [{
+    Returns `lhs - rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_ShiftLeftOp {
+  string summary = "Shift Left operator";
+
+  string description = [{
+    Returns `lhs << rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_ShiftRightArithmeticOp {
+  string summary = "Shift right arithmetic operator";
+
+  string description = [{
+    Returns arithmetic `lhs >> rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_ShiftRightLogicalOp {
+  string summary = "Shift right logical operator";
+
+  string description = [{
+    Returns logical `lhs >> rhs` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_Atan2Op {
+  string summary = "Atan2 operator";
+
+  string description = [{
+    Returns `atan2(lhs/rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_AndOp {
+  string summary = "Logical and";
+
+  string description = [{
+    Returns `logical_and(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_OrOp {
+  string summary = "Logical or";
+
+  string description = [{
+    Returns `logical_or(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+class BASE_HLO_XorOp {
+  string summary = "Logical xor";
+
+  string description = [{
+    Returns `logical_xor(lhs, rhs)` element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA control flow related op definitions.
+//===----------------------------------------------------------------------===//
+
+class BASE_HLO_CaseOp {
+  string summary = "Switch-Case operator";
+
+  string description = [{
+    Returns the result of executing `branches[index]`. If
+    `index` is < 0 or >= N, then `branches[N-1] is executed as
+    the default branch.
+
+    Each branch `branches[b]` must take in a single argument of same type as
+    `branch_operands[b]` and will be invoked with `branch_operands[b]`. The type
+    of the returned value of each branch must be the same.
+
+    Note that only one of the branches will be executed depending on the value
+    of index.
+    See https://www.tensorflow.org/xla/operation_semantics#conditional.
+  }];
+
+}
+
+//===----------------------------------------------------------------------===//
+// XLA parallelism related op definitions.
+//===----------------------------------------------------------------------===//
+
+// Represents a unique identifier for each Send/Recv instruction pair or
+// optionally for collective instructions (AllReduce, CollectivePermute,
+// AllToAll). Non-positive channel_id handle is equivalent to no channel id.
+class ChannelHandle<Dialect dialect> : StructAttr<"ChannelHandle", dialect, [
+                StructFieldAttr<"handle", I64Attr>,
+                StructFieldAttr<"type", I64Attr>]> {
+  let description = "two 64-bit integers 'handle' and 'type'";
+}
+
+class BASE_HLO_ReplicaIdOp {
+  string summary = "ReplicaId operator";
+
+  string description = [{
+    Returns the unique ID (int32 scalar) of the replica.
+
+    The unique ID of each replica is an unsigned integer in the interval [0, N),
+    where N is the number of replicas. Since all the replicas are running the
+    same program, a ReplicaId() call in the program will return a different
+    value on each replica.
+
+    See https://www.tensorflow.org/xla/operation_semantics#replicaid.
+  }];
+}
+
+
+class BASE_HLO_AllReduceOp {
+  string summary = "AllReduce operator";
+
+  string description = [{
+    Performs a custom reduction across replicas.
+
+    See https://www.tensorflow.org/xla/operation_semantics#allreduce.
+  }];
+}
+
+class BASE_HLO_ReduceOp {
+  string summary = "Reduce operator";
+
+  string description = [{
+    Returns the result of executing a reduction function on one or more arrays
+    in parallel.
+
+    See https://www.tensorflow.org/xla/operation_semantics#reduce.
+  }];
+}
+
+class BASE_HLO_ReduceWindowOp {
+  string summary = "ReduceWindow operator";
+
+  string description = [{
+    Returns the result of executing a reduction function over all elements in
+    each window of one or more arrays in parallel.
+
+    See https://www.tensorflow.org/xla/operation_semantics#reducewindow.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA tuple op definitions.
+//===----------------------------------------------------------------------===//
+class BASE_HLO_GetTupleElementOp {
+  string summary = "GetTupleElement operator";
+
+  string description = [{
+    Returns a member of a tuple specified by an index.
+
+    See https://www.tensorflow.org/xla/operation_semantics#gettupleelement.
+  }];
+}
+
+class BASE_HLO_TupleOp {
+   string summary = "XLA's tuple op";
+
+   string description = [{
+     Groups a set of tensor inputs into a single tuple object.
+
+     See https://www.tensorflow.org/xla/operation_semantics#tuple.
+   }];
+}
+
+//===----------------------------------------------------------------------===//
+// Precision Config enum definitions.
+//===----------------------------------------------------------------------===//
+
+// These mirror the XLA PrecisionConfig proto enum.
+def HLO_PRECISION_DEFAULT : StrEnumAttrCase<"DEFAULT">;
+def HLO_PRECISION_HIGH    : StrEnumAttrCase<"HIGH">;
+def HLO_PRECISION_HIGHEST : StrEnumAttrCase<"HIGHEST">;
+
+def HLO_PrecisionAttr : StrEnumAttr<"Precision",
+    "XLA precision for an operand. Has backend specific meaning.",
+    [HLO_PRECISION_DEFAULT,  HLO_PRECISION_HIGH, HLO_PRECISION_HIGHEST]>;
+
+// TODO(b/129153247) See if it's possible to also validate the size.
+def HLO_PrecisionConfigAttr:
+    OptionalAttr<
+          TypedArrayAttrBase<HLO_PrecisionAttr, "Precision Config attribute">>;
+
+//===----------------------------------------------------------------------===//
+// Fast Fourier Transform Type enum definitions.
+//===----------------------------------------------------------------------===//
+
+// These mirror the XLA FftType proto enum.
+def HLO_FFT_TYPE_FFT : StrEnumAttrCase<"FFT">;
+def HLO_FFT_TYPE_IFFT : StrEnumAttrCase<"IFFT">;
+def HLO_FFT_TYPE_RFFT : StrEnumAttrCase<"RFFT">;
+def HLO_FFT_TYPE_IRFFT : StrEnumAttrCase<"IRFFT">;
+
+def HLO_FftTypeAttr : StrEnumAttr<"FftType",
+    "XLA fast fourier transform type.",
+    [HLO_FFT_TYPE_FFT, HLO_FFT_TYPE_IFFT,
+     HLO_FFT_TYPE_RFFT, HLO_FFT_TYPE_IRFFT]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison op definitions.
+//===----------------------------------------------------------------------===//
+
+// These mirror the XLA ComparisonDirection enum.
+def HLO_COMPARISON_DIRECTION_EQ : StrEnumAttrCase<"EQ">;
+def HLO_COMPARISON_DIRECTION_NE : StrEnumAttrCase<"NE">;
+def HLO_COMPARISON_DIRECTION_GE : StrEnumAttrCase<"GE">;
+def HLO_COMPARISON_DIRECTION_GT : StrEnumAttrCase<"GT">;
+def HLO_COMPARISON_DIRECTION_LE : StrEnumAttrCase<"LE">;
+def HLO_COMPARISON_DIRECTION_LT : StrEnumAttrCase<"LT">;
+
+def HLO_ComparisonDirectionAttr : StrEnumAttr<"ComparisonDirection",
+    "Which comparison operation to perform.",
+    [
+      HLO_COMPARISON_DIRECTION_EQ,
+      HLO_COMPARISON_DIRECTION_NE,
+      HLO_COMPARISON_DIRECTION_GE,
+      HLO_COMPARISON_DIRECTION_GT,
+      HLO_COMPARISON_DIRECTION_LE,
+      HLO_COMPARISON_DIRECTION_LT
+    ]>;
+
+class BASE_HLO_CompareOp {
+  string summary = "Comparison operator";
+
+  string description = [{
+    Compares `lhs` and `rhs` elementwise according to `comparison_direction`.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_comparison_operations.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// Quantize op definitions.
+//===----------------------------------------------------------------------===//
+
+// These mirror the XLA ComparisonDirection enum.
+def HLO_MIN_COMBINED : StrEnumAttrCase<"MIN_COMBINED">;
+
+def HLO_DequantizeModeAttr : StrEnumAttr<"DequantizeMode",
+  "Dequantization mode. Only MIN_COMBINED is supported.",
+  [HLO_MIN_COMBINED]>;
+
+class BASE_HLO_DequantizeOp {
+  string summary = "Dequantize operator";
+
+  string description = [{
+    Dequantize the quantized input of packed uint32 to bfloat16. Only uint8 or
+    uint16 is supported for the original unpacked input.
+
+    Returns a tensor of shape [d0,..., dn * unpack_size] if unpacked input shape
+    is [d0, ..., dn], where unpack_size = sizeof(unit32) / sizeof(T), where T is
+    the unpacked input type. If transpose_output is true, will return a tensor
+    of shape [dn * unpack_size, dn-1, ..., d1, d0]. transpose_output is faster
+    when input's rank higher than 1. The input needs to be transposed to use
+    transpose_output feature.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA Slice definitions.
+//===----------------------------------------------------------------------===//
+
+class BASE_HLO_SliceOp {
+  string summary = "Slice operator";
+
+  string description = [{
+    Slices a portion of the `operand` into a new configuration.
+
+    See https://www.tensorflow.org/xla/operation_semantics#slice.
+  }];
+}
+
+class BASE_HLO_DynamicSliceOp {
+  string summary = "Dynamic Slice operator";
+
+  string description = [{
+    Extracts a sub-array from the input array at dynamic start_indices.
+
+    See https://www.tensorflow.org/xla/operation_semantics#dynamicslice.
+  }];
+}
+
+class BASE_HLO_DynamicUpdateSliceOp {
+  string summary = "Dynamic Update Slice operator";
+
+  string description = [{
+    DynamicUpdateSlice generates a result which is the value of the input array
+    operand, with a slice update overwritten at start_indices.
+
+    See https://www.tensorflow.org/xla/operation_semantics#dynamicupdateslice.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA Other op definitions.
+//===----------------------------------------------------------------------===//
+
+class BASE_HLO_AllToAllOp {
+  string summary = "AllToAll";
+
+  string description = [{
+    AllToAll is a collective operation that sends data from all cores to all
+    cores. It has two phases:
+    - The scatter phase. On each core, the operand is split into `split_count`
+      number of blocks along the `split_dimension`, and the blocks are
+      scattered to all cores, e.g., the i-th block is sent to the i-th core.
+    - The gather phase. Each core concatenates the received blocks along the
+      `concat_dimension`.
+
+    The participating cores can be configured by:
+    - replica_groups: each ReplicaGroup contains a list of replica id
+      participating in the computation (replica id for the current replica can
+      be retrieved using ReplicaId op). AllToAll will be applied within
+      subgroups in the specified order. For example,
+      `replica_groups` = {{1,2,3}, {4,5,0}} means that an AllToAll will be applied
+      within replicas {1, 2, 3}, and in the gather phase, the received blocks
+      will be concatenated in the same order of 1, 2, 3. Then, another AllToAll
+      will be applied within replicas 4, 5, 0, and the concatenation order is
+      also 4, 5, 0. If `replica_groups` is empty, all replicas belong to one
+      group, and the concatenation order is the numerical order (0, 1, 2, ...).
+
+    Prerequisites:
+    - The dimension size of the operand on the split_dimension is divisible by
+      `split_count`.
+    - The operand's shape is not tuple.
+
+    See https://www.tensorflow.org/xla/operation_semantics#alltoall
+  }];
+}
+
+class BASE_HLO_BatchNormGradOp {
+  string summary = "Batch Normalization Gradient";
+
+  string description = [{
+    Calculates gradients of batch norm.
+
+    See https://www.tensorflow.org/xla/operation_semantics#batchnormgrad
+  }];
+}
+
+class BASE_HLO_BatchNormInferenceOp {
+  string summary = "Batch Normalization for Inference";
+
+  string description = [{
+    Normalizes an array across batch and spatial dimensions.
+
+    See https://www.tensorflow.org/xla/operation_semantics#batchnorminference
+  }];
+}
+
+class BASE_HLO_BatchNormTrainingOp {
+  string summary = "Batch Normalization for Training";
+
+  string description = [{
+    Normalizes an array across batch and spatial dimensions.
+
+    See https://www.tensorflow.org/xla/operation_semantics#batchnormtraining
+  }];
+}
+
+class BASE_HLO_BitcastConvertOp {
+  string summary = "BitcastConvert operator";
+
+  string description = [{
+    Similar to a 'tf.bitcast' in TensorFlow, performs an element-wise bitcast
+    operation from a data shape to a target shape. The dimensions must match,
+    and the conversion is an element-wise one. Bitcast is implemented as a
+    low-level cast, so machines with different floating-point representations
+    will give different results.
+
+    See https://www.tensorflow.org/xla/operation_semantics#bitcastconverttype.
+  }];
+}
+
+class BASE_HLO_BroadcastOp  {
+  string summary = "Broadcast a tensor to a higher rank by prepending dimensions";
+
+  string description = [{
+    Broadcasts the operand tensor to a higher rank by prepending
+    `broadcast_sizes` to the dimensions. The current values of the operand are
+    copied into the other dimensions.
+
+    This is a more limited form of broadcasting, that corresponds to the XLA
+    client Broadcast method. For a more general form of broadcasting, see the
+    BroadcastInDimOp.
+
+    See https://www.tensorflow.org/xla/operation_semantics#broadcast.
+  }];
+}
+
+class BASE_HLO_BroadcastInDimOp  {
+  string summary = "Broadcast a tensor into the given shape by adding dimensions.";
+
+  string description = [{
+    Broadcasts the `operand` tensor to a higher rank. This is not the limited
+    form of broadcasting exposed as the XLA client broadcast op, but rather the
+    more powerful "InDim" broadcasting, which is closer to the HLO broadcast op
+    and exposed in the XLA client BroadcastInDim method.
+
+    `broadcast_dimensions` maps the operand dimension number to the target shape
+    dimension number. It must have the same size as the rank of the operand. The
+    mapped dimensions must either be the same size or the dimension being
+    broadcast from must be size 1 (degenerate broadcasting).
+
+    For a scalar (0D tensor) operand, `broadcast_dimensions` must be empty. The
+    The scalar value will be broadcast to every element in the target shape.
+
+    See https://www.tensorflow.org/xla/broadcasting.
+  }];
+}
+
+class BASE_HLO_CholeskyOp {
+  string summary = "Cholesky operator";
+
+  string description = [{
+  Computes the Cholesky decomposition of a batch of symmetric (Hermitian)
+  positive definite matrices.
+
+  If lower is true, computes lower-triangular matrices l such that
+  `a=l.Transpose(l)`. If lower is false, computes upper-triangular matrices u such
+  that `a=Transpose(u).u`.
+
+  Input data is read only from the lower/upper triangle of a, depending on the
+  value of lower. Values from the other triangle are ignored. Output data is
+  returned in the same triangle; the values in the other triangle are
+  implementation-defined and may be anything.
+
+  If the rank of a is greater than 2, a is treated as a batch of matrices, where
+  all except the minor 2 dimensions are batch dimensions.
+
+  If a is not symmetric (Hermitian) positive definite, the result is
+  implementation-defined.
+
+    See https://www.tensorflow.org/xla/operation_semantics#cholesky.
+  }];
+}
+
+class BASE_HLO_ClampOp  {
+  string summary = "Clamp operator";
+
+  string description = [{
+    Clamps an operand to within the range between a minimum and maximum value.
+
+    Note: All three arrays must be the same shape. Alternatively, as a
+          restricted form of broadcasting, min and/or max can be a scalar (0D
+          tensor) of the element type of the tensor operand.
+
+    See https://www.tensorflow.org/xla/operation_semantics#clamp.
+  }];
+}
+
+class BASE_HLO_CollectivePermuteOp {
+  string summary = "CollectivePermute operator";
+
+  string description = [{
+    CollectivePermute is a collective operation that sends and receives data
+    cross replicas.
+    Note that there are the following restrictions on the source_target_pair:
+    - Any two pairs should not have the same target replica id, and they should
+    not have the same source replica id.
+    - If a replica id is not a target in any pair, then the output on that
+    replica is a tensor consists of 0(s) with the same shape as the input.
+
+    See https://www.tensorflow.org/xla/operation_semantics#collectivepermute.
+
+  }];
+}
+class BASE_HLO_ConcatenateOp {
+   string summary = "XLA's concatenate op";
+
+   string description = [{
+     Concatenates a set of tensors along the specified dimension.
+
+     See https://www.tensorflow.org/xla/operation_semantics#concatenate.
+   }];
+}
+
+class BASE_HLO_ConvOp {
+  string summary = "Convolution operator";
+
+  string description = [{
+    Computes a convolution of the kind used in neural networks.
+
+    See https://www.tensorflow.org/xla/operation_semantics#conv_convolution.
+  }];
+}
+
+class BASE_HLO_CopyOp {
+  string summary = "Copy operator";
+
+  string description = [{
+    Returns a copy of `operand`.
+  }];
+}
+
+class BASE_HLO_CrossReplicaSumOp {
+   string summary = "Sums input across replicated instances.";
+
+   string description = [{
+     For each of the replica groups, operands of the group devices are summed
+     so that each device has the sum.
+
+     For example, suppose there are 8 TPU devices: `[A, B, C, D, E, F, G, H]`.
+     Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+     and `B, D, F, H` as group 1. Thus we get the outputs:
+     `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+
+     See https://www.tensorflow.org/xla/operation_semantics#crossreplicasum.
+   }];
+}
+
+
+class BASE_HLO_CustomCallOp {
+  string summary = "CustomCall operator";
+
+  string description = [{
+    A custom call invokes code external to XLA. The `args` are passed to the
+    external code, and the external code is expected to produce a result of the
+    given type. The exact mechanism is backend-specific. For example, in the CPU
+    backend, a call instruction is emitted which targets a symbol with the name
+    `call_target_name`.
+
+    `call_target_name` and `backend_config` can be arbitrary strings, but
+    `call_target_name` should be short as it may be used in labels.
+    `backend_config` can encode arbitrarily large amounts of information.
+
+    See https://www.tensorflow.org/xla/operation_semantics#customcall.
+  }];
+}
+
+class BASE_HLO_DotOp {
+  string summary = "Dot operator";
+  string description = [{
+    Performs dot products between vectors, vector/matrix and matrix/matrix
+    multiplication.
+
+    See https://www.tensorflow.org/xla/operation_semantics#dot.
+  }];
+}
+
+class BASE_HLO_DotGeneralOp {
+  string summary = "General Dot operator";
+  string description = [{
+    Performs general dot products between vectors, vector/matrix and
+    matrix/matrix multiplication.
+
+    See https://www.tensorflow.org/xla/operation_semantics#dotgeneral.
+  }];
+}
+
+class BASE_HLO_FftOp {
+  string summary = "Fast fourier transform operator";
+
+  string description = [{
+    Returns the fast-fourier-transform of the input array.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#fft.
+  }];
+}
+
+class BASE_HLO_GatherOp{
+  string summary = "Gather operator";
+
+  string description = [{
+    Stitches together several slices of an input array.
+
+    See https://www.tensorflow.org/xla/operation_semantics#gather.
+  }];
+}
+
+class BASE_HLO_MapOp {
+  string summary = "Map operator";
+
+  string description = [{
+  Applies a scalar function over the given operands arrays, producing an array
+  of the same dimensions where each element is the result of the mapped function
+  applied to the corresponding elements in the input arrays.
+
+  The mapped function is an arbitrary computation with the restriction that it
+  has N inputs of scalar type T and a single output with type S. The output has
+  the same dimensions as the operands except that the element type T is replaced
+  with S.
+
+  See https://www.tensorflow.org/xla/operation_semantics#map.
+  }];
+}
+
+class BASE_HLO_ReshapeOp {
+  string summary = "Reshape operator";
+
+  string description = [{
+    Reshapes the dimensions of `operand` into a new configuration.
+
+    See https://www.tensorflow.org/xla/operation_semantics#reshape.
+  }];
+}
+
+class ScatterDimensionNumbers<Dialect dialect> : StructAttr<
+    "ScatterDimensionNumbers", dialect, [
+      StructFieldAttr<"update_window_dims", I64ElementsAttr>,
+      StructFieldAttr<"inserted_window_dims", I64ElementsAttr>,
+      StructFieldAttr<"scatter_dims_to_operand_dims", I64ElementsAttr>,
+      StructFieldAttr<"index_vector_dim", I64Attr>]> {
+  let description = "Structure of dimension information for scatter";
+}
+
+class BASE_HLO_ScatterOp {
+  string summary = "Scatter operator";
+
+  string description = [{
+    Generates a result which is the value of the input array `operand`,
+    with several slices (at indices specified by `scatter_indices`)
+    updated with the values in `updates` using `update_computation`.
+
+    See https://www.tensorflow.org/xla/operation_semantics#scatter.
+  }];
+}
+
+class BASE_HLO_SelectOp {
+  string summary = "Select operator";
+
+  string description = [{
+    Constructs an output tensor from the elements of `on_true` and `on_false`
+    based on the values of `pred`.
+
+    `pred`, `on_true` and `on_false` must be broadcast compatible.
+  }];
+}
+
+class BASE_HLO_SelectAndScatterOp {
+  string summary = "SelectAndScatter operator";
+
+  string description = [{
+    Runs a windowed selection `select` function over `operand` with shape
+    `window_dimensions` and stride `window_strides`. This will produce an amount
+    of selected locations whose shape matches `source`. These are then scattered
+    to the output which is initialized with `init_value`.
+    Multiple scattered elements which land in the same output location are
+    combined using the `scatter` function.
+
+    See https://www.tensorflow.org/xla/operation_semantics#selectandscatter.
+  }];
+}
+
+class BASE_HLO_SetDimensionSizeOp {
+  string summary = "SetDimensionSize operator";
+
+  string description = [{
+    Sets the dynamic size of operand's given dimension. Pass through the operand
+    as result, with dynamic dimension tracked by the compiler. Padded values
+    will be ignored by downstream reduction ops.
+
+    See https://www.tensorflow.org/xla/operation_semantics#setdimensionsize.
+  }];
+}
+
+class BASE_HLO_SortOp {
+  string summary = "Sort operator";
+
+  string description = [{
+    Sorts the given `operands` at the given `dimension` with the given
+    `comparator`.
+
+    See https://www.tensorflow.org/xla/operation_semantics#sort.
+  }];
+}
+
+class BASE_HLO_ReverseOp {
+  string summary = "Reverse operator";
+
+  string description = [{
+    Reverses the specified dimensions of `operand` according to the given
+    `dimensions`.
+
+    See https://www.tensorflow.org/xla/operation_semantics#rev_reverse.
+  }];
+}
+
+class BASE_HLO_PadOp {
+  string summary = "Pad operator";
+
+  string description = [{
+    Pads the edges of `operand` with the `padding_value` and according to
+    the passed configuration.
+
+    See https://www.tensorflow.org/xla/operation_semantics#pad.
+  }];
+}
+
+class BASE_HLO_TraceOp {
+  string summary = "Trace operator";
+
+  string description = [{
+    Emits a logging message `tag` with the `operand`.
+  }];
+}
+
+class BASE_HLO_TransposeOp {
+  string summary = "Transpose operator";
+
+  string description = [{
+    Permutes the dimensions of `operand` according to the given `permutation`.
+
+    `res_dimensions[i] = operand_dimensions[permutation[i]]`
+
+    See https://www.tensorflow.org/xla/operation_semantics#transpose.
+  }];
+}
+
+// These mirror the XLA Transpose enum in Triangular Solve options.
+def HLO_TRANSPOSE_INVALID : StrEnumAttrCase<"TRANSPOSE_INVALID">;
+def HLO_NO_TRANSPOSE : StrEnumAttrCase<"NO_TRANSPOSE">;
+def HLO_TRANSPOSE : StrEnumAttrCase<"TRANSPOSE">;
+def HLO_ADJOINT : StrEnumAttrCase<"ADJOINT">;
+
+def HLO_TransposeAttr : StrEnumAttr<"Transpose",
+    "Transpose options",
+    [
+      HLO_TRANSPOSE_INVALID,
+      HLO_NO_TRANSPOSE,
+      HLO_TRANSPOSE,
+      HLO_ADJOINT
+    ]>;
+
+class BASE_HLO_TriangularSolveOp {
+  string summary = "TriangularSolve operator";
+
+  string description = [{
+    Solves systems of linear equations with lower or upper triangular
+    coefficient matrices by forward- or back-substitution. Broadcasting along
+    leading dimensions, this routine solves one of the matrix systems
+    op(a) * x = b, or x * op(a) = b, for the variable x, given a and b, where
+    op(a) is either op(a) = a, or op(a) = Transpose(a), or
+    op(a) = Conj(Transpose(a)).
+
+    Input data is read only from the lower/upper triangle of a, depending on the
+    value of lower. Values from the other triangle are ignored. Output data is
+    returned in the same triangle; the values in the other triangle are
+    implementation-defined and may be anything.
+
+    If the rank of a and b are greater than 2, they are treated as batches of
+    matrices, where all except the minor 2 dimensions are batch dimensions. a
+    and b must have equal batch dimensions.
+
+    See https://www.tensorflow.org/xla/operation_semantics#triangularsolve.
+  }];
+
+}
+
+class BASE_HLO_RngUniformOp {
+  string summary = "RNG with uniform distribution.";
+
+  string description = [{
+    Constructs an output of a given shape with random numbers generated
+    following the uniform distribution over the interval `[a,b)`. The parameters
+    and output element type have to be a boolean type, an integral type or a
+    floating point types, and the types have to be consistent.
+
+    See https://www.tensorflow.org/xla/operation_semantics#rnguniform.
+  }];
+}
+
+class BASE_HLO_RngNormalOp {
+  string summary = "RNG with normal distribution.";
+
+  string description = [{
+    Constructs an output of a given shape with random numbers generated
+    following the normal distribution with parameters `mu` and `sigma`. The
+    parameters and output shape have to have a floating point elemental type.
+    The parameters furthermore have to be scalar valued.
+
+    See https://www.tensorflow.org/xla/operation_semantics#rngnormal.
+  }];
+}
+
+class BASE_HLO_ReducePrecisionOp {
+  string summary = "Reduce precision operator";
+
+  string description = [{
+    Models the effect of converting floating - point values to a lower -
+    precision format(such as IEEE - FP16) and back to the original
+    format. The number of exponent and mantissa bits in the lower -
+    precision format can be specified arbitrarily,
+    although all bit sizes may not be supported on all hardware
+    implementations.
+
+    See https://www.tensorflow.org/xla/operation_semantics#reduceprecision.
+  }];
+}
+
+class BASE_HLO_InfeedOp {
+  string summary = "Infeed operator";
+
+  string description = [{
+    Reads a single data item from the implicit Infeed streaming interface of
+    the device, interpreting the data as the given shape and its layout, and
+    returns an LHLO op of the data. Multiple Infeed operations are allowed in a
+    computation, but there must be a total order among the Infeed operations.
+    For example, two Infeeds in the code below have a total order since there
+    is a dependency between the while loops.
+
+    See https://www.tensorflow.org/xla/operation_semantics#infeed
+  }];
+}
+
+class BASE_HLO_WhileOp {
+  string summary = "While operator";
+
+  string description = [{
+    Returns the result of executing a body function until the cond body returns
+    true.
+
+    See https://www.tensorflow.org/xla/operation_semantics#while.
+  }];
+}
+
+#endif // HLO_OPS_BASE
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td b/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td
new file mode 100644
index 0000000..fdb301d
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td
@@ -0,0 +1,43 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is the utils file for the HLO dialect.
+
+#ifndef HLO_UTILS
+#define HLO_UTILS
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+
+def NullArrayAttr : NativeCodeCall<"ArrayAttr()">;
+
+def CastIntElementsAttr : NativeCodeCall<"$0.cast<DenseIntElementsAttr>()">;
+
+class ConstantSplat<string value> : NativeCodeCall<
+    "xla::getSplat(&$_builder, $0, " # value # ")">;
+
+def NullDenseIntElementsAttr : NativeCodeCall<"DenseIntElementsAttr()">;
+
+def BinBroadcastDimensions : NativeCodeCall<
+    "xla::getBroadcastDimensionsAttr(&$_builder, $0, $1)">;
+
+def BinBroadcastDimensionsNonEmpty : NativeCodeCall<
+    "xla::getBroadcastDimensionsAttr(&$_builder, $0, $1, /*allow_empty=*/false)">;
+
+// Here, the element type can be any integer or float type. But, note that only
+// 32 bit integers are supported for the value.
+class GetScalarOfType<int value> : NativeCodeCall<
+  "xla::GetScalarOfType(getElementTypeOrSelf($0)," # value # ")">;
+
+#endif // HLO_UTILS
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h b/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h
new file mode 100644
index 0000000..412711b
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h
@@ -0,0 +1,28 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_INFER_FUSIBILITY_OP_INTERFACE_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_IR_INFER_FUSIBILITY_OP_INTERFACE_H_
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/StandardTypes.h"
+
+namespace mlir {
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h.inc"
+
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_IR_INFER_FUSIBILITY_OP_INTERFACE_H_
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td b/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td
new file mode 100644
index 0000000..017a185
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td
@@ -0,0 +1,161 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file contains inferFusiblityOpInterface, which is used to guide
+// fusion decision.
+
+#ifndef MLIR_INFER_FUSIBILITY_OP_INTERFACE
+#define MLIR_INFER_FUSIBILITY_OP_INTERFACE
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+
+// OpInterface to query if an op is fusible and to query the shape equality
+// constraint among the inputs and outputs of an op.
+def InferFusibilityOpInterface : OpInterface<"InferFusibilityOpInterface"> {
+  let description = [{
+    Interface to query if an op is fusible and to query the shape equality
+    constraint among the inputs and outputs of an op.
+  }];
+
+  let methods = [
+    InterfaceMethod<
+      /*desc=*/[{If true, this op can be fused with its operands
+      }],
+      /*retTy=*/"bool",
+      /*methodName=*/"isFusibleWithOperand",
+      /*args=*/(ins),
+      /*methodBody=*/[{}],
+      /*defaultImplementation=*/[{
+        /// Returns whether this op can be fused with its operands
+        return true;
+      }]
+    >,
+    InterfaceMethod<
+      /*desc=*/[{If true, this op can be fused with its consumers
+      }],
+      /*retTy=*/"bool",
+      /*methodName=*/"isFusibleWithConsumer",
+      /*args=*/(ins),
+      /*methodBody=*/[{}],
+      /*defaultImplementation=*/[{
+        /// Return whether this op can be fused withh its consumers
+        return true;
+      }]
+    >,
+    InterfaceMethod<
+      /*desc=*/"Return whether two inputs have the same shape (assuming no"
+               "implicit broadcasting).",
+      /*retTy=*/"bool",
+      /*methodName=*/"inferInputsShapeEquality",
+      /*args=*/(ins "int":$lhs, "int":$rhs),
+      /*methodBody=*/[{}],
+      /*defaultImplementation=*/[{
+        /// Return whether two inputs have the same shape.
+        Operation *op = this->getOperation();
+        assert(lhs < op->getNumOperands() && lhs >= 0 &&
+               rhs < op->getNumOperands() && rhs >= 0);
+        if (lhs == rhs) return true;
+
+        // if both lhs and rhs have static shapes, check them directly
+        Type lhs_ty = op->getOperand(lhs).getType();
+        Type rhs_ty = op->getOperand(rhs).getType();
+        auto lhs_shape_type = lhs_ty.dyn_cast_or_null<RankedTensorType>();
+        auto rhs_shape_type = rhs_ty.dyn_cast_or_null<RankedTensorType>();
+        if (!lhs_shape_type || !lhs_shape_type.hasStaticShape() ||
+            !rhs_shape_type || !rhs_shape_type.hasStaticShape() ||
+            lhs_shape_type.getRank() != rhs_shape_type.getRank()) {
+          return false;
+        }
+        return lhs_shape_type.getShape() == rhs_shape_type.getShape();
+      }]
+    >,
+    InterfaceMethod<
+      /*desc=*/"Return whether two outputs have the same shape (assuming no"
+               " implicit broadcasting).",
+      /*retTy=*/"bool",
+      /*methodName=*/"inferOutputsShapeEquality",
+      /*args=*/(ins "int":$lhs, "int":$rhs),
+      /*methodBody=*/[{}],
+      /*defaultImplementation=*/[{
+        /// Return whether two outputs have the same shape.
+        Operation *op = this->getOperation();
+        assert(lhs < op->getNumResults() && lhs >= 0 &&
+               rhs < op->getNumResults() && rhs >= 0);
+        if (lhs == rhs) return true;
+
+        // if both lhs and rhs have static shapes, check them directly
+        Type lhs_ty = op->getResult(lhs).getType();
+        Type rhs_ty = op->getResult(rhs).getType();
+        auto lhs_shape_type = lhs_ty.dyn_cast_or_null<RankedTensorType>();
+        auto rhs_shape_type = rhs_ty.dyn_cast_or_null<RankedTensorType>();
+        if (!lhs_shape_type || !lhs_shape_type.hasStaticShape() ||
+            !rhs_shape_type || !rhs_shape_type.hasStaticShape() ||
+            lhs_shape_type.getRank() != rhs_shape_type.getRank()) {
+          return false;
+        }
+        return lhs_shape_type.getShape() == rhs_shape_type.getShape();
+      }]
+    >,
+    InterfaceMethod<
+      /*desc=*/"Return whether the input and the output have the same"
+               " shape (assuming no implicit broadcasting).",
+      /*retTy=*/"bool",
+      /*methodName=*/"inferInputOutputShapeEquality",
+      /*args=*/(ins "int":$input, "int":$output),
+      /*methodBody=*/[{}],
+      /*defaultImplementation=*/[{
+        /// Return whether the input and the output have the same shape.
+        Operation *op = this->getOperation();
+        assert(input < op->getNumOperands() && input >= 0 &&
+               output < op->getNumResults() && output >= 0);
+
+        // if both input and output have static shapes, check them directly
+        Type input_ty = op->getOperand(input).getType();
+        Type output_ty = op->getResult(output).getType();
+        auto input_shape_type = input_ty.dyn_cast_or_null<RankedTensorType>();
+        auto output_shape_type = output_ty.dyn_cast_or_null<RankedTensorType>();
+        if (!input_shape_type || !input_shape_type.hasStaticShape() ||
+            !output_shape_type || !output_shape_type.hasStaticShape() ||
+            input_shape_type.getRank() != output_shape_type.getRank()) {
+          return false;
+        }
+        return input_shape_type.getShape() == output_shape_type.getShape();
+      }]
+    >,
+    InterfaceMethod<
+      /*desc=*/[{Return the effective workload shape for the operation.
+
+      Here the effective workload shape roughly represents the maximum
+      parallelism can be used during the codegen stage. It's used to check
+      the shape-compatibility of the operation. During fusion, we only
+      try to fuse shape-compatible ops for performace.
+      For example, the effective workload shape of an elementwise op is its
+      output shape, while the effective workload shape of a reduction op may
+      be its operand shape.
+      Return None if such an inference is not possible.
+      }],
+      /*retTy=*/"llvm::Optional<Value>",
+      /*methodName=*/"inferEffectiveWorkloadShape",
+      /*args=*/(ins),
+      /*methodBody=*/[{}],
+      /*defaultImplementation=*/[{
+        /// Return effective workload size if possible, otherwise None.
+        return {};
+      }]
+    >,
+  ];
+}
+
+#endif
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h b/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h
new file mode 100644
index 0000000..554252a
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h
@@ -0,0 +1,52 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines the operations used in the LXLA dialect.
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_LHLO_OPS_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_IR_LHLO_OPS_H_
+
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Dialect.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Location.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpDefinition.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Operation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Types.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/SideEffectInterfaces.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/ViewLikeInterface.h"
+
+namespace mlir {
+class OpBuilder;
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_structs.h.inc"
+
+namespace xla_lhlo {
+
+class XlaLhloDialect : public Dialect {
+ public:
+  explicit XlaLhloDialect(MLIRContext *context);
+  static StringRef getDialectNamespace() { return "xla_lhlo"; }
+};
+
+#define GET_OP_CLASSES
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc"
+
+}  // namespace xla_lhlo
+}  // end namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_IR_LHLO_OPS_H_
diff --git a/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td b/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td
new file mode 100644
index 0000000..4e4235d
--- /dev/null
+++ b/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td
@@ -0,0 +1,796 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is the operation definition file for LXLA.
+//
+// This file largely overlaps with hlo_ops.td at a logic level. It's tempting to
+// merge these two files together, but we need to consider the following
+// obstacles:
+// * We need to have a common representation for arguments. That is to say,
+// HLO_Array<X> translates to HLO_Tensor<X> in HLO dialect, and
+// Arg<LHLO_Buffer<X>, "", [Mem(Read|Write)]> in LHLO. Array types within tuples
+// also need to be transformed.
+// * As of now, TableGen's dag functions are not sufficient to accomplish the
+// one above.
+// * Traits aren't identical, but need to be coped. For example,
+// SameOperandAndResultType in HLO corresponds to SameTypeOperands in LHLO.
+// * Also, currently HLO describes the API in XLA's client side, not service
+// side. LHLO aims for the service side.
+
+#ifndef LHLO_OPS
+#define LHLO_OPS
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/SideEffectInterfaces.td"
+include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/ViewLikeInterface.td"
+include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td"
+
+def LHLO_Dialect : Dialect {
+  let name = "xla_lhlo";
+  let cppNamespace = "xla_lhlo";
+}
+
+//===----------------------------------------------------------------------===//
+// XLA type definitions.
+//===----------------------------------------------------------------------===//
+
+// Any integer tensor types
+def LHLO_IntBuffer : MemRefOf<[HLO_Int]>;
+
+// Any floating-point tensor types
+def LHLO_FpBuffer : MemRefOf<[AnyFloat]>;
+
+def LHLO_ComplexBuffer : MemRefOf<[AnyComplex]>;
+
+def LHLO_FpOrComplexBuffer : MemRefOf<[AnyFloat, AnyComplex]>;
+
+def LHLO_PredBuffer : MemRefOf<[HLO_Pred]>;
+
+// Any integer or floating-point tensor types
+def LHLO_IntOrFpBuffer : MemRefOf<[HLO_Int, AnyFloat]>;
+
+def LHLO_PredOrIntBuffer : MemRefOf<[HLO_Int, HLO_Pred]>;
+
+def LHLO_Buffer : MemRefOf<[AnyFloat, AnySignlessInteger, AnyComplex]>;
+
+//===----------------------------------------------------------------------===//
+// XLA nullary op definitions.
+//===----------------------------------------------------------------------===//
+
+class LHLO_Op<string mnemonic, list<OpTrait> traits> :
+  Op<LHLO_Dialect, mnemonic,
+    !listconcat([MemoryEffects<[MemRead, MemWrite]>], traits)>;
+
+def LHLO_ConstOp : LHLO_Op<"constant", []>, BASE_HLO_ConstOp {
+  let arguments = (ins
+    ElementsAttr:$value,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_IotaOp : LHLO_Op<"iota", []>, BASE_HLO_IotaOp {
+  let arguments = (ins I64Attr:$iota_dimension,
+                   Arg<LHLO_Buffer, "", [MemWrite]>:$output);
+}
+
+//===----------------------------------------------------------------------===//
+// XLA unary elementwise op definitions.
+//===----------------------------------------------------------------------===//
+// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions
+
+class LHLO_UnaryElementwiseOp<string mnemonic,
+                              Type BufferType = LHLO_Buffer,
+                              list<OpTrait> traits = [SameTypeOperands]>
+    : LHLO_Op<mnemonic, traits> {
+  let arguments = (ins Arg<BufferType, "", [MemRead]>:$input,
+                       Arg<BufferType, "", [MemWrite]>:$output);
+}
+
+def LHLO_AbsOp: LHLO_UnaryElementwiseOp<"abs">, BASE_HLO_AbsOp;
+
+// TODO(timshen): add a custom verifier.
+def LHLO_BitcastConvertOp:
+    LHLO_UnaryElementwiseOp<"bitcast_convert", LHLO_Buffer, [SameOperandsShape]>, BASE_HLO_BitcastConvertOp;
+
+def LHLO_CeilOp: LHLO_UnaryElementwiseOp<"ceil", LHLO_FpBuffer>, BASE_HLO_CeilOp;
+
+def LHLO_ClzOp: LHLO_UnaryElementwiseOp<"count_leading_zeros", LHLO_IntBuffer>, BASE_HLO_ClzOp;
+
+// TODO(timshen): add a custom verifier.
+def LHLO_ConvertOp : LHLO_UnaryElementwiseOp<"convert", LHLO_Buffer, [SameOperandsShape]>, BASE_HLO_ConvertOp;
+
+def LHLO_CosOp: LHLO_UnaryElementwiseOp<"cosine", LHLO_FpOrComplexBuffer>, BASE_HLO_CosOp;
+
+def LHLO_ExpOp: LHLO_UnaryElementwiseOp<"exponential", LHLO_FpOrComplexBuffer>, BASE_HLO_ExpOp;
+
+def LHLO_Expm1Op: LHLO_UnaryElementwiseOp<"exponential_minus_one", LHLO_FpOrComplexBuffer>, BASE_HLO_Expm1Op;
+
+def LHLO_FloorOp: LHLO_UnaryElementwiseOp<"floor", LHLO_FpBuffer>, BASE_HLO_FloorOp;
+
+def LHLO_ImagOp: LHLO_Op<"imag", [SameOperandsShape]>, BASE_HLO_ImagOp {
+  let arguments = (ins Arg<LHLO_ComplexBuffer, "", [MemRead]>:$input,
+                       Arg<LHLO_FpBuffer, "", [MemWrite]>:$output);
+}
+
+def LHLO_IsFiniteOp: LHLO_Op<"is_finite", [SameOperandsShape]>, BASE_HLO_IsFiniteOp {
+  let arguments = (ins Arg<LHLO_FpBuffer, "", [MemRead]>:$input,
+                       Arg<LHLO_PredBuffer, "", [MemWrite]>:$output);
+}
+
+def LHLO_LogOp: LHLO_UnaryElementwiseOp<"log", LHLO_FpOrComplexBuffer>, BASE_HLO_LogOp;
+
+def LHLO_Log1pOp: LHLO_UnaryElementwiseOp<"log_plus_one", LHLO_FpOrComplexBuffer>, BASE_HLO_Log1pOp;
+
+def LHLO_NegOp: LHLO_UnaryElementwiseOp<"negate">, BASE_HLO_NegOp;
+
+def LHLO_NotOp: LHLO_UnaryElementwiseOp<"not", LHLO_PredOrIntBuffer>, BASE_HLO_NotOp;
+
+def LHLO_PopulationCountOp: LHLO_UnaryElementwiseOp<"popcnt", LHLO_IntBuffer>, BASE_HLO_PopulationCountOp;
+
+def LHLO_RealOp: LHLO_Op<"real", [SameOperandsShape]>, BASE_HLO_RealOp {
+  let arguments = (ins Arg<LHLO_ComplexBuffer, "", [MemRead]>:$input,
+                       Arg<LHLO_FpBuffer, "", [MemWrite]>:$output);
+}
+
+def LHLO_RoundOp: LHLO_UnaryElementwiseOp<"round_nearest_afz", LHLO_FpBuffer>, BASE_HLO_RoundOp;
+
+def LHLO_RsqrtOp: LHLO_UnaryElementwiseOp<"rsqrt", LHLO_FpOrComplexBuffer>, BASE_HLO_RsqrtOp;
+
+def LHLO_SqrtOp: LHLO_UnaryElementwiseOp<"sqrt", LHLO_FpOrComplexBuffer>, BASE_HLO_SqrtOp;
+
+def LHLO_SignOp: LHLO_UnaryElementwiseOp<"sign">, BASE_HLO_SignOp;
+
+def LHLO_SinOp: LHLO_UnaryElementwiseOp<"sine", LHLO_FpOrComplexBuffer>, BASE_HLO_SinOp;
+
+def LHLO_TanhOp: LHLO_UnaryElementwiseOp<"tanh", LHLO_FpOrComplexBuffer>, BASE_HLO_TanhOp;
+
+//===----------------------------------------------------------------------===//
+// XLA binary elementwise op definitions.
+//===----------------------------------------------------------------------===//
+// See https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations
+
+class LHLO_BinaryElementwiseOp<string mnemonic, Type BufferType = LHLO_Buffer,
+                               list<OpTrait> traits = [SameTypeOperands]> :
+        LHLO_Op<mnemonic, traits> {
+  let arguments = (ins
+      Arg<BufferType, "", [MemRead]>:$lhs,
+      Arg<BufferType, "", [MemRead]>:$rhs,
+      Arg<BufferType, "", [MemWrite]>:$out,
+      OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions
+  );
+}
+
+def LHLO_AddOp : LHLO_BinaryElementwiseOp<"add">, BASE_HLO_AddOp;
+
+def LHLO_AndOp: LHLO_BinaryElementwiseOp<"and", LHLO_PredOrIntBuffer>, BASE_HLO_AndOp;
+
+def LHLO_Atan2Op : LHLO_BinaryElementwiseOp<"atan2", LHLO_FpOrComplexBuffer>, BASE_HLO_Atan2Op;
+
+def LHLO_ComplexOp: LHLO_Op<"complex", [SameOperandsShape]>, BASE_HLO_ComplexOp {
+  let arguments = (ins
+      Arg<LHLO_FpBuffer, "", [MemRead]>:$lhs,
+      Arg<LHLO_FpBuffer, "", [MemRead]>:$rhs,
+      Arg<LHLO_ComplexBuffer, "", [MemWrite]>:$output,
+      OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions
+  );
+}
+
+def LHLO_DivOp : LHLO_BinaryElementwiseOp<"divide">, BASE_HLO_DivOp;
+
+def LHLO_MaxOp : LHLO_BinaryElementwiseOp<"maximum">, BASE_HLO_MaxOp;
+
+def LHLO_MinOp : LHLO_BinaryElementwiseOp<"minimum">, BASE_HLO_MinOp;
+
+def LHLO_MulOp : LHLO_BinaryElementwiseOp<"multiply">, BASE_HLO_MulOp;
+
+def LHLO_OrOp : LHLO_BinaryElementwiseOp<"or", LHLO_PredOrIntBuffer>, BASE_HLO_OrOp;
+
+def LHLO_PowOp : LHLO_BinaryElementwiseOp<"power">, BASE_HLO_PowOp;
+
+def LHLO_RemOp : LHLO_BinaryElementwiseOp<"remainder", LHLO_IntOrFpBuffer>, BASE_HLO_RemOp;
+
+def LHLO_ShiftLeftOp : LHLO_BinaryElementwiseOp<"shift_left", LHLO_IntBuffer>, BASE_HLO_ShiftLeftOp;
+
+def LHLO_ShiftRightArithmeticOp : LHLO_BinaryElementwiseOp<"shift_right_arithmetic", LHLO_IntBuffer>, BASE_HLO_ShiftRightArithmeticOp;
+
+def LHLO_ShiftRightLogicalOp : LHLO_BinaryElementwiseOp<"shift_right_logical", LHLO_IntBuffer>, BASE_HLO_ShiftRightLogicalOp;
+
+def LHLO_SubOp : LHLO_BinaryElementwiseOp<"subtract">, BASE_HLO_SubOp;
+
+def LHLO_XorOp : LHLO_BinaryElementwiseOp<"xor", LHLO_PredOrIntBuffer>, BASE_HLO_XorOp;
+
+//===----------------------------------------------------------------------===//
+// XLA control flow op definitions.
+//===----------------------------------------------------------------------===//
+
+// TODO(b/139813999): specify required function signature in a type-safe way.
+def LHLO_ReduceOp: LHLO_Op<"reduce", [
+      SameVariadicOperandSize,
+      SingleBlockImplicitTerminator<"TerminatorOp">
+    ]>, BASE_HLO_ReduceOp {
+  let arguments = (ins
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$operands,
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$init_values,
+    Arg<Variadic<LHLO_Buffer>, "", [MemWrite]>:$out,
+    I64ElementsAttr:$dimensions
+  );
+
+  let regions = (region SizedRegion<1>:$body);
+}
+
+def LHLO_ReduceWindowOp: LHLO_Op<"reduce_window", [
+      SingleBlockImplicitTerminator<"TerminatorOp">
+    ]>, BASE_HLO_ReduceWindowOp {
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$init_value,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$out,
+    I64ElementsAttr:$window_dimensions,
+    // If strides or dilations attributes are missing then the default value is
+    // one for each of the input dimensions. Similarly, padding values are zero
+    // for both low and high in each of the dimensions, if not specified.
+    OptionalAttr<I64ElementsAttr>:$window_strides,
+    OptionalAttr<I64ElementsAttr>:$base_dilations,
+    OptionalAttr<I64ElementsAttr>:$window_dilations,
+    OptionalAttr<I64ElementsAttr>:$padding
+  );
+
+  let regions = (region SizedRegion<1>:$body);
+}
+
+// TODO(timshen): Add a custom parser to hide operand_segment_sizes. For example,
+// A tuple-like pattern match syntax could work:
+// xla_lhlo.case %index, (%input0, %input1, %input2), (%output0, %output1) {
+//   ...
+// }, {
+//   ...
+// } : (type_input0, type_input1, type_input2, type_output0, type_output1) -> ()
+def LHLO_CaseOp: LHLO_Op<"case", [
+      AttrSizedOperandSegments,
+      SingleBlockImplicitTerminator<"TerminatorOp">
+    ]>, BASE_HLO_CaseOp {
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$index,
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$branch_operands,
+    Arg<Variadic<LHLO_Buffer>, "", [MemWrite]>:$out
+  );
+
+  let regions = (region VariadicRegion<SizedRegion<1>>:$branches);
+}
+
+// TODO(timshen): Add a custom syntax for this.
+def LHLO_WhileOp: LHLO_Op<"while", [SameVariadicOperandSize]>,
+                  BASE_HLO_WhileOp {
+  let arguments = (ins
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$val,
+    Arg<Variadic<LHLO_Buffer>, "", [MemWrite]>:$output
+  );
+
+  let regions = (region SizedRegion<1>:$cond, SizedRegion<1>:$body);
+}
+
+//===----------------------------------------------------------------------===//
+// XLA tuple op definitions.
+//===----------------------------------------------------------------------===//
+
+def LHLO_CompareOp: LHLO_Op<"compare", []>, BASE_HLO_CompareOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
+    Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
+    Arg<LHLO_PredBuffer, "", [MemWrite]>:$out,
+    OptionalAttr<BroadcastDimAttr>:$broadcast_dimensions,
+    HLO_ComparisonDirectionAttr:$comparison_direction
+  );
+}
+
+//===----------------------------------------------------------------------===//
+// XLA Slice definitions.
+//===----------------------------------------------------------------------===//
+
+def LHLO_SliceOp: LHLO_Op<
+      "slice",
+      [AllTypesMatch<["start_indices", "limit_indices", "strides"]>]> {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    I64ElementsAttr:$start_indices,
+    I64ElementsAttr:$limit_indices,
+    I64ElementsAttr:$strides
+  );
+}
+
+def HLO_DynamicUpdateSliceOp: LHLO_Op<"dynamic-update-slice", []> {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$update,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$start_indices
+  );
+}
+
+//===----------------------------------------------------------------------===//
+// StaticMemRefCastOp
+//===----------------------------------------------------------------------===//
+
+def HLO_StaticMemRefCastOp: Op<LHLO_Dialect, "static_memref_cast",
+    [NoSideEffect, DeclareOpInterfaceMethods<ViewLikeOpInterface>]> {
+  let summary = [{
+    "modifies the offset, sizes and strides of a statically shaped memref.
+  }];
+  let description = [{
+    Allows to modify the offset, sizes and strides of a statically shaped memref.
+
+    Example:
+    ```mlir
+    %buf_transformed =
+        xla_lhlo.static_memref_cast %buf
+        : memref<1x5xf32> -> memref<5xf32, offset: 2, strides: [1]>
+
+    // The result of the op is a rank-1 memref with `[5]` shape, stride 1 and
+    // offset 2.
+    ```
+  }];
+
+  let arguments = (ins Arg<LHLO_Buffer, "", []>:$operand);
+  let results = (outs Res<LHLO_Buffer, "", []>:$result);
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, MemRefType resultType, " #
+    "Value operand", [{
+       result.addOperands(operand);
+       result.types.push_back(resultType);
+     }]>];
+
+  let extraClassDeclaration = [{
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+  }];
+
+  let verifier = [{ return Verify(*this); }];
+  let assemblyFormat = [{
+    $operand attr-dict `:` type($operand) `->` type($result)
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicMemRefCastOp
+//===----------------------------------------------------------------------===//
+
+def HLO_DynamicMemRefCastOp: Op<LHLO_Dialect, "dynamic_memref_cast",
+    [SameVariadicOperandSize, NoSideEffect,
+     DeclareOpInterfaceMethods<ViewLikeOpInterface>]> {
+  let summary = "dynamic memref cast operation";
+  let description = [{
+    Change sizes and strides of a memref using the values computed in runtime.
+
+    Example:
+    ```mlir
+    %buf_transformed =
+        xla_lhlo.dynamic_memref_cast %buf(%size_X, %size_Y)[%step_X, %step_Y]
+        : memref<?x?xf32> -> memref<?x?xf32, offset: 0, strides: [?, ?]>
+    // The result of the op is a type-erased memref with `[%size_X, %size_Y]`
+    // shape and `[%step_X, %step_Y]` strides. The offset will be inherited
+    // from the input.
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", []>:$operand,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides
+  );
+  let results = (outs Res<LHLO_Buffer, "", []>:$result);
+
+  let builders = [OpBuilder<
+    "OpBuilder &builder, OperationState &result, MemRefType resultType, " #
+    "Value operand, ValueRange sizes, ValueRange strides", [{
+       result.addOperands(operand);
+       result.addOperands(sizes);
+       result.addOperands(strides);
+       result.types.push_back(resultType);
+     }]>];
+
+  let extraClassDeclaration = [{
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+  }];
+
+  let verifier = [{ return Verify(*this); }];
+  let assemblyFormat = [{
+    $operand `(` $sizes `)` `[` $strides `]` attr-dict `:` type($operand) `->`
+    type($result)
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// XLA Other op definitions.
+//===----------------------------------------------------------------------===//
+
+def LHLO_BatchNormGradOp : LHLO_Op<"batch_norm_grad", []>,
+    BASE_HLO_BatchNormGradOp {
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$scale,
+    Arg<LHLO_Buffer, "", [MemRead]>:$mean,
+    Arg<LHLO_Buffer, "", [MemRead]>:$variance,
+    Arg<LHLO_Buffer, "", [MemRead]>:$grad_output,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$grad_operand,  // gradient of $operand.
+    Arg<LHLO_Buffer, "", [MemWrite]>:$grad_scale,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$grad_offset,
+    F32Attr:$epsilon,
+    I64Attr:$feature_index
+  );
+
+}
+
+def LHLO_BatchNormInferenceOp : LHLO_Op<"batch_norm_inference", []>,
+    BASE_HLO_BatchNormInferenceOp {
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$scale,
+    Arg<LHLO_Buffer, "", [MemRead]>:$offset,
+    Arg<LHLO_Buffer, "", [MemRead]>:$mean,
+    Arg<LHLO_Buffer, "", [MemRead]>:$variance,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    F32Attr:$epsilon,
+    I64Attr:$feature_index
+  );
+}
+
+def LHLO_BatchNormTrainingOp : LHLO_Op<"batch_norm_training", []>,
+    BASE_HLO_BatchNormTrainingOp {
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$scale,
+    Arg<LHLO_Buffer, "", [MemRead]>:$offset,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$batch_mean,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$batch_var,
+    F32Attr:$epsilon,
+    I64Attr:$feature_index
+  );
+}
+
+// TODO(timshen): add a custom verifier.
+def LHLO_BitcastOp: LHLO_Op<"bitcast", []> {
+  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$input,
+                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
+}
+
+def LHLO_BroadcastOp : LHLO_Op<"broadcast",
+      []>, BASE_HLO_BroadcastOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    I64ElementsAttr:$broadcast_sizes
+  );
+}
+
+def LHLO_BroadcastInDimOp : LHLO_Op<"broadcast_in_dim",
+      []>, BASE_HLO_BroadcastInDimOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    BroadcastDimAttr:$broadcast_dimensions
+  );
+}
+
+def LHLO_ClampOp : LHLO_Op<"clamp", []>, BASE_HLO_ClampOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$min,
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$max,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_ConcatenateOp : LHLO_Op<"concatenate", []>, BASE_HLO_ConcatenateOp {
+   let arguments = (ins
+     Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$val,
+     Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+     I64Attr:$dimension
+   );
+}
+
+// TODO(bondhugula): Make this struct dialect independent so that it can be
+// shared between the HLO and LHLO dialects.
+def ConvDimensionNumbers : StructAttr<"ConvDimensionNumbers", LHLO_Dialect, [
+  StructFieldAttr<"input_batch_dimension",I64Attr>,
+  StructFieldAttr<"input_feature_dimension", I64Attr>,
+  StructFieldAttr<"input_spatial_dimensions", I64ElementsAttr>,
+  StructFieldAttr<"kernel_input_feature_dimension", I64Attr>,
+  StructFieldAttr<"kernel_output_feature_dimension", I64Attr>,
+  StructFieldAttr<"kernel_spatial_dimensions", I64ElementsAttr>,
+  StructFieldAttr<"output_batch_dimension", I64Attr>,
+  StructFieldAttr<"output_feature_dimension", I64Attr>,
+  StructFieldAttr<"output_spatial_dimensions", I64ElementsAttr>] > {
+
+  let description = "Structure of dimension information for conv op";
+}
+
+def LHLO_ConvOp : LHLO_Op<"convolution", []>, BASE_HLO_ConvOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
+    Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    // Default value: one for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$window_strides,
+    // Default value: zero for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$padding,
+    // Default value: one for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$lhs_dilation,
+    // Default value: one for each of the spatial dimension.
+    OptionalAttr<I64ElementsAttr>:$rhs_dilation,
+    ConvDimensionNumbers:$dimension_numbers,
+    I64Attr:$feature_group_count,
+    I64Attr:$batch_group_count,
+    HLO_PrecisionConfigAttr:$precision_config
+  );
+}
+
+def LHLO_CopyOp: LHLO_Op<"copy", []>, BASE_HLO_CopyOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_DotOp: LHLO_Op<"dot", []>, BASE_HLO_DotOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
+    Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
+    HLO_PrecisionConfigAttr:$precision_config,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_GatherOp: LHLO_Op<"gather", []>, BASE_HLO_GatherOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_IntBuffer, "", [MemRead]>:$start_indices,
+    I64Attr:$index_vector_dim,
+    I64ElementsAttr:$offset_dims,
+    I64ElementsAttr:$slice_sizes,
+    I64ElementsAttr:$collapsed_slice_dims,
+    I64ElementsAttr:$start_index_map,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_ReshapeOp: LHLO_Op<"reshape", []>, BASE_HLO_ReshapeOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_ScatterOp: LHLO_Op<"scatter", []>, BASE_HLO_ScatterOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$scatter_indices,
+    Arg<LHLO_Buffer, "", [MemRead]>:$updates,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    ScatterDimensionNumbers<LHLO_Dialect>:$scatter_dimension_numbers,
+    DefaultValuedAttr<BoolAttr, "false">:$indices_are_sorted,
+    DefaultValuedAttr<BoolAttr, "false">:$unique_indices
+  );
+
+  let regions = (region SizedRegion<1>:$update_computation);
+}
+
+def LHLO_SelectOp: LHLO_Op<"select", []>, BASE_HLO_SelectOp {
+  let arguments = (ins
+    Arg<LHLO_PredBuffer, "", [MemRead]>:$pred,
+    Arg<LHLO_Buffer, "", [MemRead]>:$on_true,
+    Arg<LHLO_Buffer, "", [MemRead]>:$on_false,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_SelectAndScatterOp: LHLO_Op<"select_and_scatter", []>,
+      BASE_HLO_SelectAndScatterOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$source,
+    Arg<LHLO_Buffer, "", [MemRead]>:$init_value,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$out,
+    OptionalAttr<I64ElementsAttr>:$window_dimensions,
+    OptionalAttr<I64ElementsAttr>:$window_strides,
+    OptionalAttr<I64ElementsAttr>:$padding
+  );
+
+  let regions = (region SizedRegion<1>:$select, SizedRegion<1>:$scatter);
+}
+
+def LHLO_ReverseOp: LHLO_Op<"reverse", []>, BASE_HLO_ReverseOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    I64ElementsAttr:$dimensions,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_PadOp: LHLO_Op<"pad", []>, BASE_HLO_PadOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemRead]>:$padding_value,
+    I64ElementsAttr:$edge_padding_low,
+    I64ElementsAttr:$edge_padding_high,
+    I64ElementsAttr:$interior_padding,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_TransposeOp: LHLO_Op<"transpose", []>, BASE_HLO_TransposeOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    I64ElementsAttr:$permutation,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output
+  );
+}
+
+def LHLO_ReducePrecisionOp: LHLO_Op<"reduce_precision", [SameTypeOperands]>,
+                            BASE_HLO_ReducePrecisionOp {
+  let arguments = (ins
+    Arg<LHLO_FpBuffer, "", [MemRead]>:$operand,
+    Arg<LHLO_FpBuffer, "", [MemWrite]>:$output,
+    I32Attr:$exponent_bits,
+    I32Attr:$mantissa_bits
+  );
+}
+
+def LHLO_AllReduceOp : LHLO_Op<"all_reduce", [SameTypeOperands]>,
+                       BASE_HLO_AllReduceOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    I64ElementsAttr:$replica_groups,
+    DefaultValuedAttr<BoolAttr, "false">:$constrain_layout,
+    OptionalAttr<ChannelHandle<LHLO_Dialect>>:$channel_id,
+    DefaultValuedAttr<BoolAttr, "false">:$use_global_device_ids
+  );
+  let regions = (region SizedRegion<1>:$computation);
+}
+
+def LHLO_CollectivePermuteOp: LHLO_Op<"collective_permute", [SameTypeOperands]>,
+                              BASE_HLO_CollectivePermuteOp {
+
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    I64ElementsAttr:$source_target_pairs,
+    OptionalAttr<ChannelHandle<LHLO_Dialect>>:$channel_id
+  );
+}
+
+def LHLO_FftOp: LHLO_Op<"fft", []>, BASE_HLO_FftOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    HLO_FftTypeAttr:$fft_type,
+    I64ElementsAttr:$fft_length
+  );
+}
+
+def LHLO_CholeskyOp: LHLO_Op<"cholesky", [SameOperandsElementType]>, BASE_HLO_CholeskyOp {
+  let arguments = (ins
+    Arg<LHLO_FpOrComplexBuffer, "", [MemRead]>:$a,
+    Arg<LHLO_FpOrComplexBuffer, "", [MemWrite]>:$output,
+    DefaultValuedAttr<BoolAttr, "false">:$lower
+  );
+}
+
+def LHLO_Infeed: LHLO_Op<"infeed", []>, BASE_HLO_InfeedOp {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    DefaultValuedAttr<StrAttr, "">:$config
+  );
+}
+
+def LHLO_Outfeed: LHLO_Op<"outfeed", []> {
+  let arguments = (ins
+    Arg<LHLO_Buffer, "", [MemRead]>:$operand,
+    DefaultValuedAttr<StrAttr, "">:$config
+  );
+}
+
+def LHLO_ReplicaIdOp : LHLO_Op<"replica_id", []>, BASE_HLO_ReplicaIdOp {
+  let arguments = (ins Arg<MemRefOf<[UI32]>, "", [MemWrite]>);
+}
+
+def LHLO_TriangularSolveOp: LHLO_Op<"triangular_solve", [SameOperandsElementType]>,
+                            BASE_HLO_TriangularSolveOp {
+  let arguments = (ins
+    Arg<LHLO_FpOrComplexBuffer, "", [MemRead]>:$a,
+    Arg<LHLO_FpOrComplexBuffer, "", [MemRead]>:$b,
+    Arg<LHLO_FpOrComplexBuffer, "", [MemWrite]>:$output,
+    BoolAttr:$left_side,
+    BoolAttr:$lower,
+    BoolAttr:$unit_diagonal,
+    HLO_TransposeAttr:$transpose_a
+  );
+}
+
+// TODO(timshen): add a custom verifier.
+def LHLO_MapOp: LHLO_Op<"map", [SameOperandsShape]>, BASE_HLO_MapOp {
+  let arguments = (ins
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$operands,
+    Arg<LHLO_Buffer, "", [MemWrite]>:$output,
+    I64ElementsAttr:$dimensions
+  );
+  let regions = (region SizedRegion<1>:$computation);
+}
+
+def LHLO_RngGetAndUpdateStateOp: LHLO_Op<"rng_get_and_update_state", []> {
+  let arguments = (ins
+    Arg<MemRefOf<[UI64]>, "", [MemRead, MemWrite]>:$state,
+    I64Attr:$delta
+  );
+}
+
+// TODO(timshen): add a custom verifier.
+def LHLO_SortOp: LHLO_Op<"sort", [SameVariadicOperandSize, SameOperandsShape]>, BASE_HLO_SortOp {
+  let arguments = (ins
+    Arg<Variadic<LHLO_Buffer>, "", [MemRead]>:$operands,
+    Arg<Variadic<LHLO_Buffer>, "", [MemWrite]>:$output,
+    DefaultValuedAttr<I64Attr, "-1">:$dimension,
+    DefaultValuedAttr<BoolAttr, "false">:$is_stable
+  );
+
+  let regions = (region SizedRegion<1>:$comparator);
+}
+
+//===----------------------------------------------------------------------===//
+// Late operations
+//===----------------------------------------------------------------------===//
+
+def FusionOp : LHLO_Op<"fusion", [SingleBlockImplicitTerminator<"TerminatorOp">]> {
+  let summary = "Fusion operator";
+  let description = [{
+    Models the fusion instruction generated by the XLA compiler's fusion pass.
+
+    Fusion instructions are generated by the fusion pass of the XLA compiler.
+    They serve as a hint to the backend that it is beneficial to emit the
+    contained instructions into a single loop nest or kernel. The XLA fusion
+    pass is designed such that it only generates fusion nodes that can be
+    handled by the XLA compilers backends.
+    The XLA runtime expects this hint to be followed, as it expects a single
+    kernel per HLO instruction. This restriction might be lifted in the future.
+  }];
+  let regions = (region SizedRegion<1>:$region);
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+     OpBuilder<"OpBuilder &builder, OperationState &result, "
+               "ArrayRef<NamedAttribute> attributes">
+   ];
+}
+
+def TerminatorOp :
+    LHLO_Op<"terminator", [Terminator]> {
+  let summary = "LHLO termination operation";
+  let description = [{
+    Terminator operation for the LHLO dialect.
+  }];
+  let builders = [OpBuilder<
+    "OpBuilder &b, OperationState &result, ValueRange operands",
+    [{ build(b, result, llvm::None, operands, llvm::None); }]
+  >];
+}
+
+#endif // LHLO_OPS
diff --git a/include/mlir-hlo/utils/broadcast_utils.h b/include/mlir-hlo/utils/broadcast_utils.h
new file mode 100644
index 0000000..957d5f9
--- /dev/null
+++ b/include/mlir-hlo/utils/broadcast_utils.h
@@ -0,0 +1,49 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_BROADCAST_UTILS_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_IR_BROADCAST_UTILS_H_
+
+// Utilities relating to implementing HLO broadcasting.
+// Note: This file should not depend on any non-MLIR TensorFlow libraries.
+
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Builders.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Location.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Interfaces/InferTypeOpInterface.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Support/LLVM.h"
+
+namespace mlir {
+namespace xla {
+
+// Checks whether the given operand types and broadcast_dims attr represent a
+// legal combination for "numpy" style broadcasting (where 1-dims are prepended
+// to the smaller ranked operand until it is of the same rank as the larger).
+// See: https://docs.scipy.org/doc/numpy/reference/ufuncs.html
+bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs,
+                                 DenseIntElementsAttr broadcast_dims);
+
+// Emits shape dialect ops to compute the result shape for a broadcasting
+// binary elementwise op which broadcasts according to "numpy" semantics
+// (see above), returning an extents tensor of the resulting shape.
+Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs,
+                                                        Value rhs,
+                                                        OpBuilder& builder);
+
+}  // namespace xla
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_IR_BROADCAST_UTILS_H_
diff --git a/include/mlir-hlo/utils/convert_op_folder.h b/include/mlir-hlo/utils/convert_op_folder.h
new file mode 100644
index 0000000..dcda285
--- /dev/null
+++ b/include/mlir-hlo/utils/convert_op_folder.h
@@ -0,0 +1,33 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
+
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+
+namespace mlir {
+namespace xla {
+
+// Converts the given elements attr to the specified elements type.
+// Requires type of the elements and new_type to be either integer or float
+// type.
+mlir::ElementsAttr ConvertElementsAttr(const mlir::ElementsAttr& elements,
+                                       mlir::Type new_type);
+}  // namespace xla
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
diff --git a/include/mlir-hlo/utils/hlo_utils.h b/include/mlir-hlo/utils/hlo_utils.h
new file mode 100644
index 0000000..cfb7184
--- /dev/null
+++ b/include/mlir-hlo/utils/hlo_utils.h
@@ -0,0 +1,74 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_UTILS_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_UTILS_H_
+
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Builders.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/PatternMatch.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/TypeUtilities.h"
+
+namespace mlir {
+namespace xla {
+
+// Computes the broadcast dimensions attr for an elementwise binary operator
+// between two ranked tensors.
+// If `allow_empty` is true, then null can be returned to mean that the
+// broadcast is an "identity".
+mlir::DenseIntElementsAttr getBroadcastDimensionsAttr(mlir::Builder* b,
+                                                      mlir::Value x,
+                                                      mlir::Value y,
+                                                      bool allow_empty = true);
+
+// Get a constant splat for the given value of type. Requires value to be of
+// type static shaped RankedTensorType.
+template <typename T>
+static ElementsAttr getSplat(Builder* b, RankedTensorType ty, T constant) {
+  Type element_ty = getElementTypeOrSelf(ty);
+
+  if (element_ty.isSignlessInteger())
+    return DenseElementsAttr::get(ty, b->getIntegerAttr(element_ty, constant));
+
+  if (element_ty.isa<FloatType>())
+    return DenseElementsAttr::get(ty, b->getFloatAttr(element_ty, constant));
+
+  if (auto complex_ty = element_ty.dyn_cast<ComplexType>()) {
+    auto complex_element_ty = complex_ty.getElementType();
+    if (complex_element_ty.isF32())
+      return DenseElementsAttr::get(ty,
+                                    static_cast<std::complex<float>>(constant));
+    if (complex_element_ty.isF64())
+      return DenseElementsAttr::get(
+          ty, static_cast<std::complex<double>>(constant));
+  }
+  llvm_unreachable("unhandled element type");
+}
+
+template <typename T>
+static ElementsAttr getSplat(Builder* b, Value val, T constant) {
+  return getSplat(b, val.getType().cast<RankedTensorType>(), constant);
+}
+
+// Returns DenseElementsAttr of rank zero with the given element type and the
+// value.
+// Requires `ty` to be either FloatType of IntegerType.
+DenseElementsAttr GetScalarOfType(Type ty, int64_t raw_value);
+
+}  // namespace xla
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_UTILS_H_
diff --git a/lib/Dialect/mhlo/IR/chlo_ops.cc b/lib/Dialect/mhlo/IR/chlo_ops.cc
new file mode 100644
index 0000000..fd2e441
--- /dev/null
+++ b/lib/Dialect/mhlo/IR/chlo_ops.cc
@@ -0,0 +1,278 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
+
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Builders.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Diagnostics.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/TypeUtilities.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h"
+
+namespace mlir {
+namespace xla_chlo {
+
+template <typename T>
+static LogicalResult Verify(T op) {
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// BinaryOps
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Gets the resulting type from a broadcast between two types.
+static Type GetBroadcastType(Type x, Type y, Type element_type,
+                             DenseIntElementsAttr broadcast_dimensions_attr) {
+  auto x_ranked = x.dyn_cast<RankedTensorType>();
+  auto y_ranked = y.dyn_cast<RankedTensorType>();
+  if (!x_ranked || !y_ranked) {
+    return UnrankedTensorType::get(element_type);
+  }
+
+  auto shape_x = x_ranked.getShape();
+  auto shape_y = y_ranked.getShape();
+
+  if (shape_x.size() == shape_y.size()) {
+    llvm::SmallVector<int64_t, 4> out_shape(shape_x.size());
+    for (int i = 0, e = shape_x.size(); i < e; i++) {
+      auto x_val = shape_x[i];
+      auto y_val = shape_y[i];
+      if (x_val == -1 || y_val == -1) {
+        out_shape[i] = -1;
+      } else {
+        out_shape[i] = std::max(x_val, y_val);
+      }
+    }
+    return RankedTensorType::get(out_shape, element_type);
+  }
+
+  auto shape_large = shape_x.size() > shape_y.size() ? shape_x : shape_y;
+  auto shape_small = shape_x.size() <= shape_y.size() ? shape_x : shape_y;
+
+  llvm::SmallVector<int64_t, 4> broadcast_dimensions;
+  if (broadcast_dimensions_attr) {
+    // Explicit broadcast dimensions.
+    for (const APInt& int_value : broadcast_dimensions_attr.getIntValues()) {
+      broadcast_dimensions.push_back(int_value.getSExtValue());
+    }
+    if (broadcast_dimensions.size() != shape_small.size()) {
+      // Signal illegal broadcast_dimensions as unranked.
+      return UnrankedTensorType::get(element_type);
+    }
+  } else {
+    // If no broadcast dimensions, assume "numpy" broadcasting.
+    broadcast_dimensions = llvm::to_vector<4>(llvm::seq<int64_t>(
+        shape_large.size() - shape_small.size(), shape_large.size()));
+  }
+
+  llvm::SmallVector<int64_t, 4> out_shape(shape_large.begin(),
+                                          shape_large.end());
+
+  // Update according to the broadcast dimensions.
+  for (auto index_pair : llvm::enumerate(broadcast_dimensions)) {
+    auto old_value = out_shape[index_pair.value()];
+    auto new_value = shape_small[index_pair.index()];
+    if (old_value != -1 && (new_value == -1 || new_value > old_value)) {
+      out_shape[index_pair.value()] = new_value;
+    }
+  }
+
+  return RankedTensorType::get(out_shape, element_type);
+}
+
+LogicalResult InferBroadcastBinaryOpReturnTypeComponents(
+    MLIRContext* context, Optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, Type element_type,
+    SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
+  // Find broadcast_dimensions.
+  DenseIntElementsAttr broadcast_dimensions =
+      attributes.get("broadcast_dimensions")
+          .dyn_cast_or_null<DenseIntElementsAttr>();
+
+  ShapedType lhs_type = operands[0].getType().dyn_cast<ShapedType>();
+  ShapedType rhs_type = operands[1].getType().dyn_cast<ShapedType>();
+  if (!lhs_type || !rhs_type ||
+      lhs_type.getElementType() != rhs_type.getElementType()) {
+    return emitOptionalError(location, "mismatched operand types");
+  }
+  if (!element_type) element_type = lhs_type.getElementType();
+  Type result_type =
+      GetBroadcastType(lhs_type, rhs_type, element_type, broadcast_dimensions);
+
+  if (auto ranked_result_type = result_type.dyn_cast<RankedTensorType>()) {
+    inferedReturnShapes.emplace_back(ranked_result_type.getShape(),
+                                     element_type);
+    return success();
+  }
+
+  // TODO(laurenzo): This should be constructing with `element_type` but that
+  // constructor variant needs to be added upstream.
+  inferedReturnShapes.emplace_back(/* element_type */);
+  return success();
+}
+
+LogicalResult ReifyBroadcastBinaryOpReturnTypeShapes(
+    OpBuilder& builder, Operation* op,
+    SmallVectorImpl<Value>& reifiedReturnShapes) {
+  auto loc = op->getLoc();
+  auto lhs = op->getOperand(0);
+  auto rhs = op->getOperand(1);
+
+  // Check for "numpy"-style rank broadcast.
+  auto broadcast_dimensions = op->getAttr("broadcast_dimensions")
+                                  .dyn_cast_or_null<DenseIntElementsAttr>();
+  if (broadcast_dimensions &&
+      !xla::IsLegalNumpyRankedBroadcast(lhs, rhs, broadcast_dimensions)) {
+    // Note: It is unclear whether the general specification of explicit
+    // broadcast_dimensions on binary ops is a feature we want to carry
+    // forward. While it can technically be implemented for ranked-dynamic,
+    // it is incompatible with unranked inputs. If this warning is emitted
+    // in real programs, it is an indication that the feature should be
+    // implemented versus just falling back on the more standard definition
+    // of numpy-like prefix-padding.
+    return op->emitWarning()
+           << "unsupported non prefix-padded dynamic rank "
+           << "broadcast_dimensions = " << broadcast_dimensions;
+  }
+
+  Value computed_shape = xla::ComputeBinaryElementwiseBroadcastingResultExtents(
+      loc, lhs, rhs, builder);
+  if (!computed_shape) return failure();
+  reifiedReturnShapes.push_back(computed_shape);
+  return success();
+}
+}  // namespace
+
+//===----------------------------------------------------------------------===//
+// BroadcastComplexOp (has custom type inference due to different result type).
+//===----------------------------------------------------------------------===//
+
+LogicalResult BroadcastComplexOp::inferReturnTypeComponents(
+    MLIRContext* context, Optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, RegionRange regions,
+    SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
+  ShapedType lhs_type = operands[0].getType().dyn_cast<ShapedType>();
+  if (!lhs_type) {
+    return emitOptionalError(location, "expected ShapedType");
+  }
+  Type element_type = ComplexType::get(lhs_type.getElementType());
+  return InferBroadcastBinaryOpReturnTypeComponents(context, location, operands,
+                                                    attributes, element_type,
+                                                    inferedReturnShapes);
+}
+LogicalResult BroadcastComplexOp::reifyReturnTypeShapes(
+    OpBuilder& builder, SmallVectorImpl<Value>& reifiedReturnShapes) {
+  return ReifyBroadcastBinaryOpReturnTypeShapes(builder, getOperation(),
+                                                reifiedReturnShapes);
+}
+
+//===----------------------------------------------------------------------===//
+// BroadcastCompareOp (has custom type inference due to different result type).
+//===----------------------------------------------------------------------===//
+
+void BroadcastCompareOp::build(OpBuilder& builder, OperationState& result,
+                               Value lhs, Value rhs,
+                               DenseIntElementsAttr broadcast_dimensions,
+                               StringAttr comparison_direction) {
+  auto new_type = GetBroadcastType(lhs.getType(), rhs.getType(),
+                                   builder.getI1Type(), broadcast_dimensions);
+  build(builder, result, new_type, lhs, rhs, broadcast_dimensions,
+        comparison_direction);
+}
+
+LogicalResult BroadcastCompareOp::inferReturnTypeComponents(
+    MLIRContext* context, Optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, RegionRange regions,
+    SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
+  Type element_type = IntegerType::get(1, context);
+  return InferBroadcastBinaryOpReturnTypeComponents(context, location, operands,
+                                                    attributes, element_type,
+                                                    inferedReturnShapes);
+}
+LogicalResult BroadcastCompareOp::reifyReturnTypeShapes(
+    OpBuilder& builder, SmallVectorImpl<Value>& reifiedReturnShapes) {
+  return ReifyBroadcastBinaryOpReturnTypeShapes(builder, getOperation(),
+                                                reifiedReturnShapes);
+}
+
+//===----------------------------------------------------------------------===//
+// Macros for method definitions that are common to most broadcasting ops.
+//===----------------------------------------------------------------------===//
+
+#define BROADCAST_INFER_SHAPE_TYPE_OP_DEFS(Op)                                \
+  LogicalResult Op::inferReturnTypeComponents(                                \
+      MLIRContext* context, Optional<Location> location, ValueRange operands, \
+      DictionaryAttr attributes, RegionRange regions,                         \
+      SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {           \
+    return InferBroadcastBinaryOpReturnTypeComponents(                        \
+        context, location, operands, attributes, /*element_type=*/nullptr,    \
+        inferedReturnShapes);                                                 \
+  }                                                                           \
+  LogicalResult Op::reifyReturnTypeShapes(                                    \
+      OpBuilder& builder, SmallVectorImpl<Value>& reifiedReturnShapes) {      \
+    return ReifyBroadcastBinaryOpReturnTypeShapes(builder, getOperation(),    \
+                                                  reifiedReturnShapes);       \
+  }
+
+#define BROADCAST_BINARY_OP_DEFS(Op)                                           \
+  void Op::build(OpBuilder& builder, OperationState& result, Value left,       \
+                 Value right, DenseIntElementsAttr broadcast_dimensions) {     \
+    auto type = GetBroadcastType(                                              \
+        left.getType().cast<ShapedType>(), right.getType().cast<ShapedType>(), \
+        getElementTypeOrSelf(right.getType()), broadcast_dimensions);          \
+    return Op::build(builder, result, type, left, right,                       \
+                     broadcast_dimensions);                                    \
+  }                                                                            \
+  BROADCAST_INFER_SHAPE_TYPE_OP_DEFS(Op)
+
+BROADCAST_BINARY_OP_DEFS(BroadcastAddOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastAndOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastAtan2Op);
+BROADCAST_BINARY_OP_DEFS(BroadcastDivOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastMaxOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastMinOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastMulOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastOrOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastPowOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastRemOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastShiftLeftOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastShiftRightArithmeticOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastShiftRightLogicalOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastSubOp);
+BROADCAST_BINARY_OP_DEFS(BroadcastXorOp);
+
+#undef BROADCAST_INFER_SHAPE_TYPE_OP_DEFS
+#undef BROADCAST_BINARY_OP_DEFS
+
+#define GET_OP_CLASSES
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc"
+
+//===----------------------------------------------------------------------===//
+// xla_chlo Dialect Constructor
+//===----------------------------------------------------------------------===//
+
+XlaHloClientDialect::XlaHloClientDialect(MLIRContext* context)
+    : Dialect(getDialectNamespace(), context) {
+  addOperations<
+#define GET_OP_LIST
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc"
+      >();
+}
+
+}  // namespace xla_chlo
+}  // namespace mlir
diff --git a/lib/Dialect/mhlo/IR/dialect_registration.cc b/lib/Dialect/mhlo/IR/dialect_registration.cc
new file mode 100644
index 0000000..855c026
--- /dev/null
+++ b/lib/Dialect/mhlo/IR/dialect_registration.cc
@@ -0,0 +1,24 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
+
+// Static initialization for XLA dialect registration.
+static mlir::DialectRegistration<mlir::xla_hlo::XlaHloDialect> xla_hlo_ops;
+static mlir::DialectRegistration<mlir::xla_chlo::XlaHloClientDialect>
+    xla_chlo_ops;
+static mlir::DialectRegistration<mlir::xla_lhlo::XlaLhloDialect> xla_lhlo_ops;
diff --git a/lib/Dialect/mhlo/IR/hlo_ops.cc b/lib/Dialect/mhlo/IR/hlo_ops.cc
new file mode 100644
index 0000000..0130f4b
--- /dev/null
+++ b/lib/Dialect/mhlo/IR/hlo_ops.cc
@@ -0,0 +1,2110 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines the operations used in the XLA dialect.
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <functional>
+
+#include "third_party/absl/container/flat_hash_set.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/APFloat.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/APInt.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/ArrayRef.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/STLExtras.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/SmallVector.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/iterator_range.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/Support/Casting.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/Support/FormatVariadic.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/Support/MathExtras.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Builders.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Dialect.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Location.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Matchers.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpDefinition.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpImplementation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Operation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OperationSupport.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/PatternMatch.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/TypeUtilities.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Types.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Value.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Support/LLVM.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Support/LogicalResult.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Transforms/InliningUtils.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h"
+
+namespace mlir {
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.cc.inc"
+namespace xla_hlo {
+
+Operation* XlaHloDialect::materializeConstant(OpBuilder& builder,
+                                              Attribute value, Type type,
+                                              Location loc) {
+  // HLO dialect constants only support ElementsAttr unlike standard dialect
+  // constant which supports all attributes.
+  if (value.isa<ElementsAttr>())
+    return builder.create<xla_hlo::ConstOp>(loc, type,
+                                            value.cast<ElementsAttr>());
+  return nullptr;
+}
+
+template <typename T>
+static LogicalResult Verify(T op) {
+  return success();
+}
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Utilities for the canonicalize patterns
+//===----------------------------------------------------------------------===//
+
+// Returns 1D 64-bit dense elements attribute with the given values.
+DenseIntElementsAttr GetI64ElementsAttr(ArrayRef<int64_t> values,
+                                        Builder* builder) {
+  RankedTensorType ty = RankedTensorType::get(
+      {static_cast<int64_t>(values.size())}, builder->getIntegerType(64));
+  return DenseIntElementsAttr::get(ty, values);
+}
+
+// Given the start indices and slice sizes for a dynamic-slice that can be
+// converted to a static slice, returns the limits for the static slice.
+DenseIntElementsAttr BuildSliceLimits(DenseIntElementsAttr start_indices,
+                                      DenseIntElementsAttr slice_sizes,
+                                      Builder* builder) {
+  SmallVector<int64_t, 4> slice_limits;
+  for (int64_t i = 0; i < slice_sizes.getNumElements(); ++i) {
+    int64_t start_index = start_indices.getValue<IntegerAttr>(i).getInt();
+    int64_t slice_size = slice_sizes.getValue<IntegerAttr>(i).getInt();
+    slice_limits.push_back(start_index + slice_size);
+  }
+  return GetI64ElementsAttr(slice_limits, builder);
+}
+
+#include "third_party/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/generated_canonicalize.inc"
+}  // namespace
+
+//===----------------------------------------------------------------------===//
+// ConstOp
+//===----------------------------------------------------------------------===//
+
+static void Print(ConstOp op, OpAsmPrinter* printer) {
+  // Print op name.
+  *printer << op.getOperationName();
+
+  // Elide attribute value while printing the attribute dictionary.
+  SmallVector<StringRef, 1> elided_attrs;
+  elided_attrs.push_back("value");
+  printer->printOptionalAttrDict(op.getAttrs(), elided_attrs);
+
+  *printer << ' ' << op.value();
+}
+
+static ParseResult ParseConstOp(OpAsmParser* parser, OperationState* result) {
+  if (parser->parseOptionalAttrDict(result->attributes)) return failure();
+
+  // If colon is not present after attribute dictionary, it should be short form
+  // and attribute 'value' is outside the dictionary.
+  if (failed(parser->parseOptionalColon())) {
+    Attribute value;
+    if (parser->parseAttribute(value, "value", result->attributes))
+      return failure();
+    return parser->addTypeToList(value.getType(), result->types);
+  }
+
+  // Long form should have type of the result after colon.
+  Type ty;
+  if (parser->parseType(ty)) return failure();
+  result->types.push_back(ty);
+  return success();
+}
+
+OpFoldResult ConstOp::fold(ArrayRef<Attribute> operands) {
+  assert(operands.empty() && "constant has no operands");
+
+  // Return the held attribute value.
+  return value();
+}
+
+// Builds a constant op with the specified attribute `value`.
+void ConstOp::build(OpBuilder& builder, OperationState& result,
+                    Attribute value) {
+  Type type;
+  if (auto elemAttr = value.dyn_cast<ElementsAttr>()) {
+    type = elemAttr.getType();
+  } else if (value.isa<BoolAttr>() || value.isa<FloatAttr>() ||
+             value.isa<IntegerAttr>()) {
+    // All XLA types must be tensor types. In the build() method, we want to
+    // provide more flexibility by allowing attributes of scalar types. But we
+    // need to wrap it up with ElementsAttr to construct valid XLA constants.
+    type = RankedTensorType::get(/*shape=*/{}, value.getType());
+    value = DenseElementsAttr::get(type.cast<TensorType>(), value);
+  }
+
+  // TODO: support other XLA specific types.
+  assert(type && "unsupported attribute type for building xla_hlo.constant");
+  result.types.push_back(type);
+  result.addAttribute("value", value);
+}
+
+//===----------------------------------------------------------------------===//
+// DotGeneralOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(DotGeneralOp op) {
+  auto dot_dimension_numbers = op.dot_dimension_numbers();
+  int64_t lhs_batching_dimensions_size = llvm::size(
+      dot_dimension_numbers.lhs_batching_dimensions().getValues<int64_t>());
+  int64_t rhs_batching_dimensions_size = llvm::size(
+      dot_dimension_numbers.rhs_batching_dimensions().getValues<int64_t>());
+  if (lhs_batching_dimensions_size != rhs_batching_dimensions_size) {
+    return op.emitError()
+           << "lhs and rhs should have the same number of batching dimensions";
+  }
+  int64_t lhs_contracting_dimensions_size = llvm::size(
+      dot_dimension_numbers.lhs_contracting_dimensions().getValues<int64_t>());
+  int64_t rhs_contracting_dimensions_size = llvm::size(
+      dot_dimension_numbers.rhs_contracting_dimensions().getValues<int64_t>());
+  if (lhs_contracting_dimensions_size != rhs_contracting_dimensions_size) {
+    return op.emitError() << "lhs and rhs should have the same number of "
+                             "contracting dimensions";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// IotaOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(IotaOp op) {
+  auto shape = op.getType().cast<ShapedType>();
+  if (!shape.hasRank()) return success();
+
+  if (shape.getRank() == 0)
+    return op.emitOpError() << "does not support scalars.";
+
+  auto iota_dimension = op.iota_dimension().getSExtValue();
+  if (iota_dimension >= shape.getRank() || iota_dimension < 0)
+    return op.emitOpError() << "iota dimension cannot go beyond the output "
+                               "rank or be negative.";
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicIotaOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct DynamicIotaIsStatic : public OpRewritePattern<DynamicIotaOp> {
+  using OpRewritePattern<DynamicIotaOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DynamicIotaOp iota,
+                                PatternRewriter& rewriter) const override {
+    auto result_ty = iota.getType().cast<ShapedType>();
+    if (!result_ty.hasStaticShape()) {
+      return failure();
+    }
+
+    rewriter.replaceOpWithNewOp<IotaOp>(iota, result_ty, iota.iota_dimension());
+    return success();
+  }
+};
+
+}  // namespace
+
+void DynamicIotaOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<DynamicIotaIsStatic>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// AbsOp
+//===----------------------------------------------------------------------===//
+
+void AbsOp::build(OpBuilder& builder, OperationState& result, Value operand) {
+  auto shaped_type = operand.getType().cast<ShapedType>();
+  Type new_type;
+  if (!shaped_type.getElementType().isa<ComplexType>()) {
+    new_type = operand.getType();
+  } else if (shaped_type.hasRank()) {
+    new_type = RankedTensorType::get(shaped_type.getShape(), operand.getType());
+  } else {
+    new_type = UnrankedTensorType::get(operand.getType());
+  }
+
+  return AbsOp::build(builder, result, new_type, operand);
+}
+
+//===----------------------------------------------------------------------===//
+// CollectivePermuteOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(CollectivePermuteOp op) {
+  // Check that source target pair is Nx2 tensor.
+  auto type = op.source_target_pairs().getType().dyn_cast<RankedTensorType>();
+  if (type.getRank() != 2)
+    return op.emitError() << "expect source_target_pairs attribute to be of "
+                             "rank 2, but got rank "
+                          << type.getRank();
+  if (type.getShape()[1] != 2)
+    return op.emitError()
+           << "expect source_target_pairs attribute of shape (N, 2), but got ("
+           << type.getShape() << ")";
+  // Check source target pairs for duplicate sources or targets
+  absl::flat_hash_set<int64_t> sources;
+  absl::flat_hash_set<int64_t> targets;
+  for (auto i = op.source_target_pairs().begin(),
+            e = op.source_target_pairs().end();
+       i != e; ++i) {
+    auto val = (*i).getSExtValue();
+    if (i.getIndex() % 2 == 0) {
+      bool is_unique = sources.insert(val).second;
+      if (!is_unique) return op.emitError() << "duplicate sources not allowed.";
+    } else {
+      bool is_unique = targets.insert(val).second;
+      if (!is_unique) return op.emitError() << "duplicate targets not allowed.";
+    }
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ConvertOp
+//===----------------------------------------------------------------------===//
+
+void ConvertOp::build(OpBuilder& builder, OperationState& result, Value operand,
+                      Type result_element_ty) {
+  Type result_ty;
+  Type operand_ty = operand.getType();
+  if (auto ranked_ty = operand_ty.dyn_cast<RankedTensorType>()) {
+    result_ty = RankedTensorType::get(ranked_ty.getShape(), result_element_ty);
+  } else {
+    result_ty = UnrankedTensorType::get(result_element_ty);
+  }
+  build(builder, result, result_ty, operand);
+}
+
+OpFoldResult ConvertOp::fold(ArrayRef<Attribute> operands) {
+  if (getOperand().getType() == getResult().getType()) return getOperand();
+
+  // If the result has non-static shape, a convert op is necessary to go from
+  // static shape to non-static shape.
+  if (!getResult().getType().cast<TensorType>().hasStaticShape()) return {};
+
+  // If the operand is constant, we can do the conversion now.
+  if (auto elementsAttr = operands.front().dyn_cast_or_null<ElementsAttr>()) {
+    return xla::ConvertElementsAttr(elementsAttr,
+                                    getElementTypeOrSelf(getResult()));
+  }
+
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// DequantizeOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(DequantizeOp op) {
+  auto input_type = op.input().getType().dyn_cast<ShapedType>();
+  auto output_type = op.output().getType().dyn_cast<ShapedType>();
+  if (!input_type || !output_type) {
+    return op.emitError() << "ranked input and output.";
+  }
+  auto input_shape = input_type.getShape();
+  auto output_shape = output_type.getShape().vec();
+  if (op.transpose_output()) {
+    std::reverse(output_shape.begin(), output_shape.end());
+  }
+
+  // Check the input rank and output rank are same, and also the lower
+  // dimensions are same.
+  if (input_shape.size() != output_shape.size() ||
+      !std::equal(input_shape.begin(),
+                  std::next(input_shape.begin(), input_shape.size() - 1),
+                  output_shape.begin())) {
+    return op.emitError() << "mismatched dimensions.";
+  }
+
+  // Check that the last dimension of the output is 2x or 4x of that of the
+  // input depending on the unpacked input is 16 or 8 bits.
+  int input_last_dim = *input_shape.rbegin();
+  int output_last_dim = *output_shape.rbegin();
+  int scale_factor = op.is_16bits() ? 2 : 4;
+  if (output_last_dim != scale_factor * input_last_dim) {
+    return op.emitError() << "last dimension of output should be "
+                          << scale_factor << "x of the input.";
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetTupleElementOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(GetTupleElementOp op) {
+  auto indexVal = op.index().getZExtValue();
+  auto operandType = op.getOperand().getType().cast<TupleType>();
+  if (indexVal >= operandType.size()) {
+    return op.emitOpError(
+        llvm::formatv("index {0} is out of bounds of operand with size {1}",
+                      indexVal, operandType.size()));
+  }
+
+  auto expectedType = operandType.getType(indexVal);
+  if (op.getType() != expectedType) {
+    return op.emitOpError(llvm::formatv("has return type {0}, but expected {1}",
+                                        op.getType(), expectedType));
+  }
+  return success();
+}
+
+OpFoldResult GetTupleElementOp::fold(ArrayRef<Attribute> operands) {
+  if (auto tupleOp =
+          dyn_cast_or_null<xla_hlo::TupleOp>(getOperand().getDefiningOp())) {
+    return tupleOp.getOperand(index().getLimitedValue());
+  }
+
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// TupleOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(TupleOp op) {
+  SmallVector<Type, 8> operandTypes = {op.operand_type_begin(),
+                                       op.operand_type_end()};
+  auto expectedType = TupleType::get(operandTypes, op.getContext());
+  if (op.getType() != expectedType) {
+    return op.emitOpError(llvm::formatv("has return type {0}, but expected {1}",
+                                        op.getType(), expectedType));
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// AllToAllOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(AllToAllOp op) {
+  // If operand is ranked, size of split dimension should be a multiple of split
+  // count.
+  auto type = op.getOperand().getType().dyn_cast<RankedTensorType>();
+  if (!type) return success();
+  auto split_dim_size = type.getDimSize(op.split_dimension().getSExtValue());
+  auto split_count = op.split_count().getSExtValue();
+  if (split_dim_size % split_count != 0) {
+    return op.emitError() << "split dimension has size " << split_dim_size
+                          << ", expected to be a multiple of split_count "
+                          << split_count;
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// BroadcastOp
+//===----------------------------------------------------------------------===//
+
+// TODO(b/129012527) These should be expressed as type constraints.
+static LogicalResult Verify(BroadcastOp op) {
+  auto sizes = op.broadcast_sizes();
+  auto sizesType = sizes.getType();
+  auto sizesRank = sizesType.getRank();
+  if (sizesRank != 1) {
+    return op.emitOpError(llvm::formatv(
+        "broadcast_sizes has rank {0} instead of rank 1", sizesRank));
+  }
+
+  auto resultType = op.getResult().getType().cast<RankedTensorType>();
+  auto resultRank = resultType.getRank();
+  auto operandType = op.operand().getType().cast<RankedTensorType>();
+  auto operandRank = operandType.getRank();
+  auto sizesSize = sizesType.getNumElements();
+  auto expectedRank = operandRank + sizesSize;
+
+  if (resultRank != expectedRank) {
+    return op.emitOpError(
+        llvm::formatv("result rank ({0}) does not match operand rank "
+                      "({1}) plus size of broadcast_sizes ({2})",
+                      resultRank, operandRank, sizesSize));
+  }
+
+  llvm::SmallVector<int64_t, 10> expectedShape(sizes.getValues<int64_t>());
+
+  auto operandShape = operandType.getShape();
+  expectedShape.insert(expectedShape.end(), operandShape.begin(),
+                       operandShape.end());
+
+  auto resultShape = resultType.getShape();
+  if (resultShape != llvm::makeArrayRef(expectedShape)) {
+    return op.emitOpError(llvm::formatv(
+        "result has shape [{0}] instead of [{1}]",
+        llvm::make_range(resultShape.begin(), resultShape.end()),
+        llvm::make_range(expectedShape.begin(), expectedShape.end())));
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// BroadcastInDimOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(BroadcastInDimOp op) {
+  auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
+  auto operandRank = operandType.getRank();
+  if (!op.broadcast_dimensions()) {
+    if (operandRank == 0) {
+      return success();
+    }
+    return op.emitOpError(
+        llvm::formatv("broadcast_dimensions is absent, but required because "
+                      "operand has non-zero rank ({0})",
+                      operandRank));
+  }
+
+  auto dimensions = op.broadcast_dimensions();
+  auto dimensionsType = op.broadcast_dimensions().getType();
+  auto dimensionsRank = dimensionsType.getRank();
+  if (dimensionsRank != 1) {
+    return op.emitOpError(llvm::formatv(
+        "broadcast_dimensions has rank {0} instead of rank 1", dimensionsRank));
+  }
+
+  auto dimensionsSize = dimensionsType.getNumElements();
+  if (dimensionsSize != operandRank) {
+    return op.emitOpError(llvm::formatv(
+        "broadcast_dimensions size ({0}) does not match operand rank ({1})",
+        dimensionsSize, operandRank));
+  }
+
+  auto resultType = op.getResult().getType().cast<RankedTensorType>();
+  auto resultRank = resultType.getRank();
+  if (resultRank < operandRank) {
+    return op.emitOpError(
+        llvm::formatv("result rank ({0}) is less than operand rank ({1})",
+                      resultRank, operandRank));
+  }
+
+  for (int i = 0; i != dimensionsSize; ++i) {
+    auto dimIndex = dimensions.getValue<int64_t>(i);
+    if (dimIndex >= resultRank) {
+      return op.emitOpError(
+          llvm::formatv("broadcast_dimensions contains invalid value {0} for "
+                        "result result with rank {1}",
+                        dimIndex, resultRank));
+    }
+
+    auto dimSize = operandType.getDimSize(i);
+    auto resultDimSize = resultType.getDimSize(dimIndex);
+    if (dimSize != 1 && dimSize != resultDimSize) {
+      return op.emitOpError(
+          llvm::formatv("size of operand dimension {0} ({1}) is not equal to "
+                        "1 or size of result dimension {2} ({3})",
+                        i, dimSize, dimIndex, resultDimSize));
+    }
+  }
+
+  return success();
+}
+
+OpFoldResult BroadcastInDimOp::fold(ArrayRef<Attribute>) {
+  auto type = getType().cast<RankedTensorType>();
+  if (type != getOperand().getType()) {
+    return nullptr;
+  }
+  auto broadcast_values = broadcast_dimensions().getValues<int64_t>();
+  if (!std::equal(broadcast_values.begin(), broadcast_values.end(),
+                  llvm::seq<int64_t>(0, type.getRank()).begin())) {
+    return nullptr;
+  }
+  return getOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicBroadcastInDimOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(DynamicBroadcastInDimOp op) {
+  auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
+  auto resultType = op.getResult().getType().dyn_cast<RankedTensorType>();
+
+  // If either the operand or result are unranked, there is very little
+  // to verify statically.
+  if (!operandType || !resultType) {
+    return success();
+  }
+
+  auto outputDimensionsType =
+      op.output_dimensions().getType().cast<RankedTensorType>();
+  auto outputDimensionsSize = outputDimensionsType.getDimSize(0);
+  auto operandRank = operandType.getRank();
+  auto resultRank = resultType.getRank();
+
+  // Verify broadcast_dimensions.
+  auto bcastDimensions = op.broadcast_dimensions();
+  auto bcastDimensionsType = op.broadcast_dimensions().getType();
+  auto bcastDimensionsRank = bcastDimensionsType.getRank();
+  // TODO(laurenzo): Update the BroadcastDimAttr to constrain its rank to 1.
+  if (bcastDimensionsRank != 1) {
+    return op.emitOpError(
+        llvm::formatv("broadcast_dimensions has rank {0} instead of rank 1",
+                      bcastDimensionsRank));
+  }
+
+  auto bcastDimensionsSize = bcastDimensionsType.getNumElements();
+  if (bcastDimensionsSize != operandRank) {
+    return op.emitOpError(llvm::formatv(
+        "broadcast_dimensions size ({0}) does not match operand rank ({1})",
+        bcastDimensionsSize, operandRank));
+  }
+
+  if (resultRank < operandRank) {
+    return op.emitOpError(
+        llvm::formatv("result rank ({0}) is less than operand rank ({1})",
+                      resultRank, operandRank));
+  }
+
+  for (int i = 0; i != bcastDimensionsSize; ++i) {
+    auto dimIndex = bcastDimensions.getValue<int64_t>(i);
+    if (dimIndex >= resultRank) {
+      return op.emitOpError(
+          llvm::formatv("broadcast_dimensions contains invalid value {0} for "
+                        "result result with rank {1}",
+                        dimIndex, resultRank));
+    }
+
+    auto dimSize = operandType.getDimSize(i);
+    auto resultDimSize = resultType.getDimSize(dimIndex);
+    if (dimSize != 1 && dimSize != resultDimSize) {
+      return op.emitOpError(
+          llvm::formatv("size of operand dimension {0} ({1}) is not equal to "
+                        "1 or size of result dimension {2} ({3})",
+                        i, dimSize, dimIndex, resultDimSize));
+    }
+  }
+
+  if (outputDimensionsSize != resultRank) {
+    return op.emitOpError(
+        llvm::formatv("result rank ({0}) is not equal to number of output "
+                      "dimensions ({1})",
+                      resultRank, outputDimensionsSize));
+  }
+
+  return success();
+}
+
+// If a DynamicBroadCastInDimOp is not actually dynamic, use an ordinary
+// BroadcastInDimOp.
+class DynamicBroadcastInDimOpNotActuallyDynamic
+    : public OpRewritePattern<DynamicBroadcastInDimOp> {
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(DynamicBroadcastInDimOp op,
+                                PatternRewriter& rewriter) const override {
+    auto type = op.getType().dyn_cast<RankedTensorType>();
+    if (!type || !type.hasStaticShape()) {
+      return rewriter.notifyMatchFailure(op, "requires static shape");
+    }
+    rewriter.replaceOpWithNewOp<BroadcastInDimOp>(
+        op, op.getType(), op.operand(), op.broadcast_dimensions());
+    return success();
+  }
+};
+
+void DynamicBroadcastInDimOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<DynamicBroadcastInDimOpNotActuallyDynamic>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// ClampOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(ClampOp op) {
+  auto operandType = op.operand().getType().cast<RankedTensorType>();
+  auto operandShape = operandType.getShape();
+  auto minType = op.min().getType().cast<RankedTensorType>();
+
+  auto minShape = minType.getShape();
+  if (minShape != operandShape && minType.getRank() != 0) {
+    return op.emitOpError(llvm::formatv(
+        "min shape [{0}] is not scalar and does not match operand shape [{1}]",
+        llvm::make_range(minShape.begin(), minShape.end()),
+        llvm::make_range(operandShape.begin(), operandShape.end())));
+  }
+
+  auto maxType = op.max().getType().cast<RankedTensorType>();
+  auto maxShape = maxType.getShape();
+  if (maxShape != operandShape && maxType.getRank() != 0) {
+    return op.emitOpError(llvm::formatv(
+        "max shape [{0}] is not scalar and does not match operand shape [{1}]",
+        llvm::make_range(maxShape.begin(), maxShape.end()),
+        llvm::make_range(operandShape.begin(), operandShape.end())));
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexOp
+//===----------------------------------------------------------------------===//
+
+void ComplexOp::build(OpBuilder& builder, OperationState& state, Value lhs,
+                      Value rhs) {
+  auto type = lhs.getType();
+  auto element_ty = ComplexType::get(getElementTypeOrSelf(type));
+  Type result_ty;
+  if (auto ranked_type = type.dyn_cast<RankedTensorType>()) {
+    result_ty = RankedTensorType::get(ranked_type.getShape(), element_ty);
+  } else if (type.isa<UnrankedTensorType>()) {
+    result_ty = UnrankedTensorType::get(element_ty);
+  } else {
+    result_ty = element_ty;
+  }
+
+  build(builder, state, result_ty, lhs, rhs);
+}
+
+OpFoldResult ComplexOp::fold(ArrayRef<Attribute> operands) {
+  auto real_op =
+      dyn_cast_or_null<xla_hlo::RealOp>(getOperand(0).getDefiningOp());
+  auto imag_op =
+      dyn_cast_or_null<xla_hlo::ImagOp>(getOperand(1).getDefiningOp());
+  if (real_op && imag_op && real_op.getOperand() == imag_op.getOperand()) {
+    return real_op.getOperand();
+  }
+
+  return {};
+}
+
+namespace {
+Type CreateRealType(Type type) {
+  auto element_ty = getElementTypeOrSelf(type);
+  if (auto complex_ty = element_ty.dyn_cast<ComplexType>()) {
+    element_ty = complex_ty.getElementType();
+  }
+
+  if (auto ranked_type = type.dyn_cast<RankedTensorType>()) {
+    return RankedTensorType::get(ranked_type.getShape(), element_ty);
+  } else if (type.dyn_cast<UnrankedTensorType>()) {
+    return UnrankedTensorType::get(element_ty);
+  }
+
+  return element_ty;
+}
+}  // namespace
+
+void ImagOp::build(OpBuilder& builder, OperationState& state, Value val) {
+  build(builder, state, CreateRealType(val.getType()), val);
+}
+
+OpFoldResult ImagOp::fold(ArrayRef<Attribute> operands) {
+  if (auto complex_op =
+          dyn_cast_or_null<xla_hlo::ComplexOp>(getOperand().getDefiningOp())) {
+    return complex_op.getOperand(1);
+  }
+
+  return {};
+}
+
+void RealOp::build(OpBuilder& builder, OperationState& state, Value val) {
+  build(builder, state, CreateRealType(val.getType()), val);
+}
+
+OpFoldResult RealOp::fold(ArrayRef<Attribute> operands) {
+  if (auto complex_op =
+          dyn_cast_or_null<xla_hlo::ComplexOp>(getOperand().getDefiningOp())) {
+    return complex_op.getOperand(0);
+  }
+
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// ConcatenateOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ConcatenateOperandRemoval : public OpRewritePattern<ConcatenateOp> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(ConcatenateOp op,
+                                PatternRewriter& rewriter) const override {
+    auto axis = op.dimension().getLimitedValue();
+    llvm::SmallVector<Value, 6> new_operands;
+    for (auto operand : op.getOperands()) {
+      auto ty = operand.getType().cast<ShapedType>();
+      if (ty.getDimSize(axis) != 0) {
+        new_operands.push_back(operand);
+      }
+    }
+
+    if (!new_operands.empty() && new_operands.size() < op.getNumOperands()) {
+      rewriter.replaceOpWithNewOp<ConcatenateOp>(op, op.getResult().getType(),
+                                                 new_operands, op.dimension());
+      return success();
+    }
+
+    return failure();
+  }
+};
+}  // namespace
+
+LogicalResult ConcatenateOp::inferReturnTypes(
+    MLIRContext*, Optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, RegionRange regions,
+    SmallVectorImpl<Type>& inferredReturnTypes) {
+  if (operands.empty()) {
+    return failure();
+  }
+
+  auto dimension_attr = attributes.get("dimension").cast<IntegerAttr>();
+  auto dimension = dimension_attr.getInt();
+
+  auto first_type = (*operands.begin()).getType().cast<ShapedType>();
+  auto out_element = first_type.getElementType();
+
+  for (auto operand : operands.getTypes()) {
+    auto element_type = getElementTypeOrSelf(operand);
+    if (element_type != out_element) {
+      return failure();
+    }
+  }
+
+  // If an input is unranked the output shape is unranked.
+  if (!first_type.hasRank()) {
+    inferredReturnTypes.push_back(UnrankedTensorType::get(out_element));
+    return success();
+  }
+
+  auto out_shape = llvm::to_vector<6>(first_type.getShape());
+  out_shape[dimension] = 0;
+
+  for (auto operand : operands.getTypes()) {
+    auto type = operand.cast<ShapedType>();
+    if (!type.hasRank()) {
+      inferredReturnTypes.push_back(UnrankedTensorType::get(out_element));
+      return success();
+    }
+
+    // If the dimension is dynamic we know the output dimension is dynamic.
+    auto dim = type.getShape()[dimension];
+    if (dim == -1) {
+      out_shape[dimension] = -1;
+      break;
+    }
+
+    out_shape[dimension] += dim;
+  }
+
+  inferredReturnTypes.push_back(RankedTensorType::get(out_shape, out_element));
+
+  return success();
+}
+
+void ConcatenateOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<ConcatenateOperandRemoval>(context);
+}
+
+template <typename T>
+static Attribute foldConcatenateHelper(ConcatenateOp* op,
+                                       ArrayRef<Attribute> operands) {
+  auto axis = op->dimension().getLimitedValue();
+  auto type = op->getType().cast<ShapedType>();
+
+  SmallVector<T, 6> values;
+  auto shape = type.getShape();
+
+  size_t top_size = 1;
+  for (int i = 0, e = axis; i < e; i++) {
+    top_size = top_size * shape[i];
+  }
+
+  for (size_t i = 0; i < top_size; i++) {
+    for (auto operand : operands) {
+      DenseElementsAttr attr = operand.cast<DenseElementsAttr>();
+      size_t bottom_size = attr.getNumElements() / top_size;
+      auto iter = attr.getValues<T>().begin() + i * bottom_size;
+      values.append(iter, iter + bottom_size);
+    }
+  }
+
+  return DenseElementsAttr::get(type, values);
+}
+
+static Attribute foldConcatenate(ConcatenateOp* op,
+                                 ArrayRef<Attribute> operands) {
+  for (auto operand : operands) {
+    if (!operand) return {};
+  }
+
+  auto type = op->getResult().getType().cast<ShapedType>();
+  auto etype = type.getElementType();
+  if (etype.isa<IntegerType>()) {
+    return foldConcatenateHelper<APInt>(op, operands);
+  }
+
+  if (etype.isa<FloatType>()) {
+    return foldConcatenateHelper<APFloat>(op, operands);
+  }
+
+  return {};
+}
+
+OpFoldResult ConcatenateOp::fold(ArrayRef<Attribute> operands) {
+  if (getNumOperands() == 1) return getOperand(0);
+
+  ShapedType type = getResult().getType().cast<ShapedType>();
+  if (!type.hasStaticShape()) return {};
+
+  auto axis = dimension().getLimitedValue();
+  if (auto attr = foldConcatenate(this, operands)) {
+    return attr;
+  }
+
+  llvm::SmallVector<Value, 6> new_operands;
+  for (auto operand : getOperands()) {
+    auto ty = operand.getType().cast<ShapedType>();
+    if (ty.getDimSize(axis) != 0) {
+      return {};
+    }
+  }
+
+  return DenseElementsAttr::get(type, ArrayRef<Attribute>());
+}
+
+static LogicalResult Verify(ConcatenateOp op) {
+  Type element_type = getElementTypeOrSelf(op.getOperand(0).getType());
+  RankedTensorType first_ranked_type;
+  int num_operands = op.getNumOperands();
+  for (int i = 0; i < num_operands; i++) {
+    auto second_type = op.getOperand(i).getType().dyn_cast<ShapedType>();
+    if (second_type.getElementType() != element_type) {
+      return op.emitOpError(
+          llvm::formatv("operands (0) and ({0}) do not match element type", i));
+    }
+
+    if (!second_type.hasRank()) {
+      continue;
+    }
+
+    if (!first_ranked_type) {
+      first_ranked_type = second_type.cast<RankedTensorType>();
+      continue;
+    }
+
+    if (first_ranked_type.getRank() != second_type.getRank()) {
+      return op.emitOpError(
+          llvm::formatv("operands (0) and ({0}) do not match rank", i));
+    }
+
+    auto first_shape = second_type.getShape();
+    auto second_shape = second_type.getShape();
+    for (int d = 0; d < first_ranked_type.getRank(); ++d) {
+      if (first_shape[d] != second_shape[d] && d != op.dimension()) {
+        return op.emitOpError(llvm::formatv(
+            "operands (0) and ({0}) non-concat dimensions do not match "
+            "({1}) != ({2})",
+            i, llvm::make_range(first_shape.begin(), first_shape.end()),
+            llvm::make_range(second_shape.begin(), second_shape.end())));
+      }
+    }
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicReshapeOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(DynamicReshapeOp op) {
+  auto result_type = op.result().getType().dyn_cast<RankedTensorType>();
+  auto output_shape_type =
+      op.output_shape().getType().dyn_cast<RankedTensorType>();
+  if (result_type && output_shape_type && output_shape_type.hasStaticShape() &&
+      output_shape_type.getDimSize(0) != result_type.getRank()) {
+    return op.emitError() << "output should have a rank equal to the number of "
+                             "elements in output_shape";
+  }
+  return success();
+}
+
+namespace {
+class DynamicReshapeOpNotActuallyDynamic
+    : public OpRewritePattern<DynamicReshapeOp> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(DynamicReshapeOp op,
+                                PatternRewriter& rewriter) const override {
+    auto type = op.result().getType().dyn_cast<RankedTensorType>();
+    if (!type || !type.hasStaticShape()) {
+      return rewriter.notifyMatchFailure(op, "requires static shape tensor");
+    }
+    rewriter.replaceOpWithNewOp<ReshapeOp>(op, op.getType(), op.operand());
+    return success();
+  }
+};
+}  // namespace
+
+void DynamicReshapeOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<DynamicReshapeOpNotActuallyDynamic>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicSliceOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Canonicalizes DynamicSlice ops that can be replaced instead with Slice ops.
+// This canonicalization is applied the case when the `begin` input values are
+// compile time constants and thus can be made into a tensor.
+struct DynamicSliceToSlice : public OpRewritePattern<DynamicSliceOp> {
+  using OpRewritePattern<DynamicSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DynamicSliceOp dynamic_slice,
+                                PatternRewriter& rewriter) const override {
+    Value input = dynamic_slice.operand();
+    auto input_tensor = input.getType().dyn_cast<RankedTensorType>();
+    if (!input_tensor) return failure();
+
+    SmallVector<int64_t, 4> temp_start_indices;
+    for (Value start : dynamic_slice.start_indices()) {
+      APInt val;
+      if (!matchPattern(start, m_ConstantInt(&val))) {
+        return failure();
+      }
+      temp_start_indices.push_back(*(val.getRawData()));
+    }
+
+    // At this point we've determined that the start indices are all constants;
+    // pack them into a single tensor.
+    auto loc = dynamic_slice.getLoc();
+    int64_t input_rank = input_tensor.getRank();
+    auto slice_start_indices =
+        GetI64ElementsAttr(temp_start_indices, &rewriter);
+    DenseIntElementsAttr slice_limits = BuildSliceLimits(
+        slice_start_indices, dynamic_slice.slice_sizes(), &rewriter);
+    DenseIntElementsAttr slice_strides =
+        GetI64ElementsAttr(SmallVector<int64_t, 4>(input_rank, 1), &rewriter);
+    auto result = rewriter.create<SliceOp>(loc, input, slice_start_indices,
+                                           slice_limits, slice_strides);
+    rewriter.replaceOp(dynamic_slice, {result});
+    return success();
+  }
+};
+
+}  // namespace
+
+void DynamicSliceOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<DynamicSliceToSlice>(context);
+}
+
+// Verifies that the number of slice sizes and the number of start indices match
+static LogicalResult Verify(DynamicSliceOp op) {
+  int num_slice_sizes = op.slice_sizes().getNumElements();
+  int num_start_indices = op.start_indices().size();
+  if (num_start_indices != num_slice_sizes) {
+    return op.emitOpError()
+           << "has mismatched number of slice sizes (" << num_slice_sizes
+           << ") and number of start indices (" << num_start_indices << ")";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// InfeedOp
+//===----------------------------------------------------------------------===//
+
+// Checks that the result type is of the form `tuple< any_type, token >`.
+static LogicalResult Verify(InfeedOp op) {
+  auto result_ty = op.getResult().getType().cast<TupleType>();
+  auto subtypes = result_ty.getTypes();
+  if (subtypes.size() != 2)
+    return op.emitOpError()
+           << "result is expected to be a tuple of size 2, but got "
+           << subtypes.size();
+  if (!subtypes[1].isa<TokenType>())
+    return op.emitOpError() << "second element of result tuple is expected to "
+                               "be of token type, but got "
+                            << subtypes[1];
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// MapOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(MapOp op) {
+  // Checks if the number of `operands` match the arity of the map `computation`
+  // region.
+  auto& computation_block = op.computation().front();
+  auto computation_args = computation_block.getArguments();
+  if (op.operands().size() != computation_args.size())
+    return op.emitOpError()
+           << "expects number of operands to match the arity "
+              "of map computation, but got: "
+           << op.operands().size() << " and " << computation_args.size();
+
+  // The parameters of computation should all be scalars and match the element
+  // type of operands.
+  auto operand_type = op.operands()[0].getType().cast<TensorType>();
+  auto operand_elem_ty = operand_type.getElementType();
+
+  for (auto indexed_arg : llvm::enumerate(computation_args)) {
+    auto arg_type = indexed_arg.value().getType().dyn_cast<TensorType>();
+    if (!arg_type || arg_type.getRank() != 0)
+      return op.emitOpError()
+             << "computation arguments must be 0-rank tensor, but got: arg #"
+             << indexed_arg.index() << " of type "
+             << indexed_arg.value().getType();
+    if (arg_type.getElementType() != operand_elem_ty) {
+      return op.emitOpError()
+             << "element type of operands and computation arguments must "
+                "match, but got: "
+             << operand_elem_ty << " and " << arg_type.getElementType();
+    }
+  }
+
+  // Mapped computation must return single output
+  auto computation_outputs = computation_block.getTerminator()->getOperands();
+  if (computation_outputs.size() != 1)
+    return op.emitOpError()
+           << "computation must return single output, but got: "
+           << computation_outputs.size();
+
+  // The output of computation must be scalar and have the same element type
+  // as op result.
+  auto computation_output_type =
+      computation_outputs[0].getType().dyn_cast<TensorType>();
+  if (!computation_output_type || computation_output_type.getRank() != 0)
+    return op.emitOpError()
+           << "computation must return 0-rank tensor, but got: "
+           << computation_outputs[0].getType();
+
+  auto result_type = op.getType().cast<TensorType>();
+  if (computation_output_type.getElementType() != result_type.getElementType())
+    return op.emitOpError() << "element type of result and computation output "
+                               "must match, but got: "
+                            << result_type.getElementType() << " and "
+                            << computation_output_type.getElementType();
+
+  // Checks that the requested map dimension numbers are monotonically
+  // increasing.
+  auto values = op.dimensions().getValues<int64_t>();
+  auto dimensions = std::vector<int64_t>{values.begin(), values.end()};
+  for (int i = 0, e = dimensions.size(); i < e; ++i) {
+    if (dimensions[i] != i)
+      return op.emitOpError() << "requires monotonically increasing dimension "
+                                 "numbers, but got: "
+                              << op.dimensions();
+  }
+
+  // Checks that number of dimensions of operands matches the size of
+  // `dimensions` since we currently only support mapping across all
+  // dimensions: i.e., scalar map functions.
+  if (operand_type.hasRank()) {
+    if (dimensions.size() != operand_type.getShape().size())
+      return op.emitOpError()
+             << "applied to a subset of dimensions currently not supported: "
+                "operand dimensions = "
+             << operand_type.getShape().size()
+             << ", requested map dimensions size = " << dimensions.size();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// RecvOp
+//===----------------------------------------------------------------------===//
+
+// Checks that the result type is of the form `tuple<any_type, xla_hlo::token>`
+static LogicalResult Verify(RecvOp op) {
+  auto result_ty = op.getResult().getType().cast<TupleType>();
+  auto subtypes = result_ty.getTypes();
+  if (subtypes.size() != 2)
+    return op.emitOpError()
+           << "result is expected to be a tuple of size 2, but got "
+           << subtypes.size();
+  if (!subtypes[1].isa<TokenType>())
+    return op.emitOpError() << "second element of result tuple is expected to "
+                               "be of token type, but got "
+                            << subtypes[1];
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CopyOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult CopyOp::fold(ArrayRef<Attribute> operands) { return getOperand(); }
+
+//===----------------------------------------------------------------------===//
+// ReverseOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult ReverseOp::fold(ArrayRef<Attribute> operands) {
+  auto input = operand();
+
+  // No dimensions to reverse.
+  if (dimensions().getNumElements() == 0) return input;
+
+  llvm::SmallVector<APInt, 5> new_dims;
+  new_dims.reserve(dimensions().getNumElements());
+
+  auto shaped_type = input.getType().cast<ShapedType>();
+  for (auto dim : dimensions().getValues<APInt>()) {
+    if (shaped_type.getDimSize(dim.getLimitedValue()) != 1) {
+      return nullptr;
+    }
+  }
+
+  return input;
+}
+
+//===----------------------------------------------------------------------===//
+// ReduceOp
+//===----------------------------------------------------------------------===//
+
+// Returns the result type after reducing operand of the given type across the
+// specified dimensions.
+static TensorType GetReduceResultType(Type operand_ty,
+                                      DenseIntElementsAttr dimensions,
+                                      Builder* builder) {
+  Type element_ty = getElementTypeOrSelf(operand_ty);
+
+  auto ranked_ty = operand_ty.dyn_cast<RankedTensorType>();
+  if (!ranked_ty) return UnrankedTensorType::get(element_ty);
+
+  int64_t rank = ranked_ty.getRank();
+  llvm::SmallVector<bool, 4> dims_mask(rank, false);
+  for (int64_t dim : dimensions.getValues<int64_t>()) dims_mask[dim] = true;
+
+  SmallVector<int64_t, 4> shape;
+  for (int64_t i = 0; i < rank; ++i) {
+    if (!dims_mask[i]) shape.push_back(ranked_ty.getDimSize(i));
+  }
+
+  return RankedTensorType::get(shape, element_ty);
+}
+
+void ReduceOp::build(OpBuilder& builder, OperationState& state,
+                     ValueRange operands, ValueRange init_values,
+                     DenseIntElementsAttr dimensions) {
+  SmallVector<Type, 1> result_ty;
+  result_ty.reserve(operands.size());
+
+  for (Value operand : operands) {
+    result_ty.push_back(
+        GetReduceResultType(operand.getType(), dimensions, &builder));
+  }
+  build(builder, state, result_ty, operands, init_values, dimensions);
+}
+
+LogicalResult ReduceOp::fold(ArrayRef<Attribute> operands,
+                             SmallVectorImpl<OpFoldResult>& results) {
+  // No dimensions to reduce.
+  if (dimensions().getNumElements() == 0) {
+    for (Value input : this->operands()) {
+      results.push_back(input);
+    }
+    return success();
+  }
+  return failure();
+}
+
+//===----------------------------------------------------------------------===//
+// SelectOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(SelectOp op) {
+  // TODO(jpienaar): Update to allow broadcastable and unranked inputs. This
+  // corresponds to the client side HLO.
+  return success();
+}
+
+// Makes it such that a SelectOp that is a non-root operation in a DRR infers
+// the return type based on operand type.
+LogicalResult SelectOp::inferReturnTypes(
+    MLIRContext*, Optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, RegionRange regions,
+    SmallVectorImpl<Type>& inferredReturnTypes) {
+  auto x_type = operands[1].getType();
+  auto y_type = operands[2].getType();
+  auto x_tensor = x_type.cast<TensorType>();
+  auto y_tensor = y_type.cast<TensorType>();
+
+  // Check for type compatibility in the select op. This requires that the two
+  // non-predicate operands:
+  //   (a) have the same element type
+  //   (b) have compatible shapes (i.e. the same shape and/or at least one
+  //       dynamic shape)
+  if (x_tensor.getElementType() != y_tensor.getElementType() ||
+      failed(mlir::verifyCompatibleShape(x_type, y_type))) {
+    return emitOptionalError(location, "incompatible operand types: ", x_type,
+                             " and ", y_type);
+  }
+
+  // TODO(lucyfox): Support output shape inference when operands have compatible
+  // shapes. (The output shape should be the most general of the operand shapes
+  // at each dimension.) For now, handle the straightforward cases and fail
+  // otherwise. When this is fully implemented, this logic should move into
+  // reusable functionality in MLIR Core.
+  Type output_type;
+  if (x_type == y_type || !x_tensor.hasRank()) {
+    output_type = x_type;
+  } else if (!y_tensor.hasRank()) {
+    output_type = y_type;
+  } else {
+    return emitOptionalError(location,
+                             "currently unsupported operand types: ", x_type,
+                             " and ", y_type);
+  }
+  inferredReturnTypes.assign({output_type});
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// PadOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(PadOp op) {
+  auto input_type = op.operand().getType().cast<RankedTensorType>();
+  auto pad_type = op.padding_value().getType().cast<RankedTensorType>();
+
+  if (pad_type.getRank() != 0) {
+    return op.emitOpError(
+        llvm::formatv("padding value type should be a rank-0 "
+                      "tensor, is rank {0}",
+                      pad_type.getRank()));
+  }
+
+  const auto& padding_low = op.edge_padding_low();
+  if (padding_low.getType().getNumElements() != input_type.getRank()) {
+    return op.emitOpError(llvm::formatv(
+        "edge_padding_low length ({0}) must match operand rank ({1})",
+        padding_low.getType().getNumElements(), input_type.getRank()));
+  }
+
+  const auto& padding_high = op.edge_padding_high();
+  if (padding_high.getType().getNumElements() != input_type.getRank()) {
+    return op.emitOpError(llvm::formatv(
+        "edge_padding_high length ({0}) must match operand rank ({1})",
+        padding_high.getType().getNumElements(), input_type.getRank()));
+  }
+
+  const auto& padding_interior = op.interior_padding();
+  if (padding_interior.getType().getNumElements() != input_type.getRank()) {
+    return op.emitOpError(llvm::formatv(
+        "interior_padding length ({0}) must match operand rank ({1})",
+        padding_interior.getType().getNumElements(), input_type.getRank()));
+  }
+
+  auto input_shape = input_type.getShape();
+  auto output_shape =
+      op.getResult().getType().cast<RankedTensorType>().getShape();
+  if (input_shape.size() != output_shape.size()) {
+    return op.emitOpError(
+        llvm::formatv("operand rank ({0}) and result rank({0}) should match",
+                      input_shape.size(), output_shape.size()));
+  }
+
+  for (int i = 0, e = input_shape.size(); i < e; i++) {
+    int padding_low_val = padding_low.getValue<IntegerAttr>(i).getInt();
+    int padding_high_val = padding_high.getValue<IntegerAttr>(i).getInt();
+    int padding_interior_val =
+        padding_interior.getValue<IntegerAttr>(i).getInt();
+    int expected_output =
+        input_shape[i] + padding_low_val + padding_high_val +
+        std::max<int64_t>(input_shape[i] - 1, 0LL) * padding_interior_val;
+    if (expected_output != output_shape[i]) {
+      return op.emitOpError(llvm::formatv(
+          "expected output shape's dimension #{0} to be {1} but found {2}", i,
+          expected_output, output_shape[i]));
+    }
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ReshapeOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(ReshapeOp op) {
+  // If the operand type is dynamically shaped there is nothing to verify.
+  auto operand_ty = op.operand().getType().cast<RankedTensorType>();
+  if (!operand_ty || !operand_ty.hasStaticShape()) return success();
+
+  // If the operand type is statically shaped (not required) the number of
+  // elements must match that of the result type.
+  auto result_ty = op.getType().cast<RankedTensorType>();
+  assert(result_ty && result_ty.hasStaticShape() &&
+         "result type must be statically shaped");
+  int64_t num_result_elements = result_ty.getNumElements();
+  int64_t num_operand_elements = operand_ty.getNumElements();
+  if (num_result_elements != num_operand_elements)
+    return op.emitOpError()
+           << "number of output elements (" << num_result_elements
+           << ") doesn't match expected number of elements ("
+           << num_operand_elements << ")";
+
+  return success();
+}
+
+OpFoldResult ReshapeOp::fold(ArrayRef<Attribute> operands) {
+  if (getOperand().getType() == getType()) {
+    return getOperand();
+  }
+
+  if (auto prev_op =
+          dyn_cast_or_null<ReshapeOp>(getOperand().getDefiningOp())) {
+    setOperand(prev_op.getOperand());
+    return getResult();
+  }
+
+  if (auto elements = operands.front().dyn_cast_or_null<DenseElementsAttr>()) {
+    return elements.reshape(getResult().getType().cast<ShapedType>());
+  }
+
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// Case Op
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(CaseOp op) {
+  auto num_branches = op.branches().size();
+  if (op.branch_operands().size() != num_branches)
+    return op.emitOpError() << "expects number of branches " << num_branches
+                            << " to be same as number of branch operands "
+                            << op.branch_operands().size();
+
+  MutableArrayRef<Region> branches = op.branches();
+  OperandRange branch_operands = op.branch_operands();
+  for (unsigned i = 0; i < num_branches; ++i) {
+    mlir::Region& branch_region = branches[i];
+    if (branch_region.empty())
+      return op.emitOpError() << "cannot have empty regions";
+    mlir::Block& entry_block = branch_region.front();
+    if (entry_block.getNumArguments() != 1)
+      return op.emitOpError()
+             << "expects branch regions to have single argument, but found "
+             << entry_block.getNumArguments() << " for branch " << i;
+    auto operand = branch_operands[i];
+    if (entry_block.getArgument(0).getType() != operand.getType())
+      return op.emitOpError()
+             << "expects operand " << i + 1 << " to be of type "
+             << entry_block.getArgument(0).getType() << ", but found "
+             << operand.getType();
+    WalkResult walker = branch_region.walk([&](ReturnOp return_op) {
+      if (return_op.getOperands().getTypes() != op.getResultTypes())
+        return WalkResult::interrupt();
+      return WalkResult::advance();
+    });
+    if (walker.wasInterrupted())
+      return op.emitOpError()
+             << "branch " << i
+             << " returned values do not match op result types";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// BinaryOps
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+// Updates the element type of a (presumed) tensor type 'x', returning either
+// a permuted UnrankedTensorType or RankedTensorType.
+static Type UpdateResultElementType(Builder* builder, Type x,
+                                    Type element_type) {
+  auto x_ranked = x.dyn_cast<RankedTensorType>();
+  if (!x_ranked) {
+    return UnrankedTensorType::get(element_type);
+  }
+
+  auto shape_x = x_ranked.getShape();
+  return RankedTensorType::get(shape_x, element_type);
+}
+}  // namespace
+
+template <typename Op, typename ElementType = Type, typename ValType,
+          typename Convert>
+static Attribute BinaryFolder(Op* op, ArrayRef<Attribute> attrs) {
+  if (!attrs[0] || !attrs[1]) return {};
+
+  DenseElementsAttr lhs = attrs[0].dyn_cast<DenseElementsAttr>();
+  DenseElementsAttr rhs = attrs[1].dyn_cast<DenseElementsAttr>();
+  if (!lhs || !rhs) return {};
+
+  ShapedType type = op->getType().template cast<ShapedType>();
+  if (!type.hasStaticShape()) {
+    return {};
+  }
+
+  Type etype = type.getElementType();
+
+  // Evaluate for integer values.
+  if (!etype.isa<ElementType>()) {
+    return {};
+  }
+
+  SmallVector<ValType, 6> values;
+  values.reserve(lhs.getNumElements());
+  for (const auto zip :
+       llvm::zip(lhs.getValues<ValType>(), rhs.getValues<ValType>())) {
+    values.push_back(Convert()(std::get<0>(zip), std::get<1>(zip)));
+  }
+
+  return DenseElementsAttr::get(type, values);
+}
+
+template <typename T>
+struct divide : std::divides<T> {};
+
+template <>
+struct divide<APInt> {
+  APInt operator()(const APInt& a, const APInt& b) const { return a.sdiv(b); }
+};
+
+template <typename T>
+struct max {
+  T operator()(const T& a, const T& b) const { return std::max<T>(a, b); }
+};
+
+template <>
+struct max<APInt> {
+  APInt operator()(const APInt& a, const APInt& b) const {
+    return llvm::APIntOps::smax(a, b);
+  }
+};
+
+template <typename T>
+struct min {
+  T operator()(const T& a, const T& b) const { return std::min<T>(a, b); }
+};
+
+template <>
+struct min<APInt> {
+  APInt operator()(const APInt& a, const APInt& b) const {
+    return llvm::APIntOps::smin(a, b);
+  }
+};
+
+#define BINARY_FOLDER(Op, Func)                                                \
+  OpFoldResult Op::fold(ArrayRef<Attribute> attrs) {                           \
+    if (getElementTypeOrSelf(getType()).isa<FloatType>())                      \
+      return BinaryFolder<Op, FloatType, APFloat, Func<APFloat>>(this, attrs); \
+    if (getElementTypeOrSelf(getType()).isa<IntegerType>())                    \
+      return BinaryFolder<Op, IntegerType, APInt, Func<APInt>>(this, attrs);   \
+    return {};                                                                 \
+  }
+
+// Addition, subtraction and multiplication use the std:: versions of the ops.
+// Due to the other ops behaving differently in signed vs unsigned integers,
+// APInts need a special implementation. Currently, it replicates signed int
+// op behavior.
+BINARY_FOLDER(AddOp, std::plus);
+BINARY_FOLDER(SubOp, std::minus);
+BINARY_FOLDER(MulOp, std::multiplies);
+BINARY_FOLDER(DivOp, divide);
+BINARY_FOLDER(MaxOp, max);
+BINARY_FOLDER(MinOp, min);
+
+#undef BINARY_FOLDER
+
+//===----------------------------------------------------------------------===//
+// SliceOp
+//===----------------------------------------------------------------------===//
+
+void SliceOp::build(OpBuilder& builder, OperationState& result, Value operand,
+                    DenseIntElementsAttr start_indices,
+                    DenseIntElementsAttr limit_indices,
+                    DenseIntElementsAttr strides) {
+  return build(builder, result,
+               InferOutputTypes(&builder, operand, start_indices, limit_indices,
+                                strides),
+               operand, start_indices, limit_indices, strides);
+}
+
+template <typename I, typename E>
+static void SliceElements(I values, ArrayRef<int64_t> sizes,
+                          ArrayRef<int64_t> starts, ArrayRef<int64_t> limits,
+                          ArrayRef<int64_t> strides,
+                          llvm::SmallVectorImpl<E>* out_values) {
+  assert(starts.size() == limits.size());
+  assert(starts.size() == strides.size());
+  if (starts.empty()) return;
+
+  int64_t start = starts.front();
+  int64_t limit = limits.front();
+  int64_t stride = strides.front();
+  if (starts.size() == 1) {
+    for (int i = start; i < limit; i += stride) {
+      out_values->push_back(*(values + i));
+    }
+    return;
+  }
+
+  for (; start < limit; start += stride) {
+    auto begin = values + start * sizes.front();
+    SliceElements<I, E>(begin, sizes.drop_front(), starts.drop_front(),
+                        limits.drop_front(), strides.drop_front(), out_values);
+  }
+}
+
+template <typename I, typename E>
+static Attribute FoldSlice(SliceOp* op, I values) {
+  auto start = llvm::to_vector<6>(op->start_indices().getValues<int64_t>());
+  auto limit = llvm::to_vector<6>(op->limit_indices().getValues<int64_t>());
+  auto stride = llvm::to_vector<6>(op->strides().getValues<int64_t>());
+
+  auto result_type = op->operand().getType().cast<ShapedType>();
+  if (!result_type.hasStaticShape()) return {};
+
+  auto shape = result_type.getShape();
+  int64_t count = result_type.getNumElements();
+  // Compute the striding for each dimension.
+  llvm::SmallVector<int64_t, 6> sizes;
+  sizes.reserve(shape.size());
+  for (auto v : shape) {
+    count = count / v;
+    sizes.push_back(count);
+  }
+
+  llvm::SmallVector<E, 6> out_values;
+  out_values.reserve(result_type.getNumElements());
+  SliceElements<I, E>(values, sizes, start, limit, stride, &out_values);
+
+  return DenseElementsAttr::get(op->getResult().getType().cast<ShapedType>(),
+                                out_values);
+}
+
+OpFoldResult SliceOp::fold(ArrayRef<Attribute> operands) {
+  // Check if the SliceOp is a NoOp operation.
+  auto operand_shape = getOperand().getType().cast<ShapedType>().getShape();
+  auto result_type = getResult().getType().cast<ShapedType>();
+  auto result_shape = result_type.getShape();
+
+  if (result_type.hasStaticShape() && (operand_shape == result_shape)) {
+    return getOperand();
+  }
+
+  if (operands.empty() || !operands.front()) return {};
+
+  // Evaluate for statically valued inputs.
+  DenseElementsAttr elements = operands.front().dyn_cast<DenseElementsAttr>();
+  if (!elements) return {};
+
+  auto etype = elements.getType().getElementType();
+  if (etype.isa<IntegerType>()) {
+    return FoldSlice<DenseElementsAttr::IntElementIterator, APInt>(
+        this, elements.getIntValues().begin());
+  } else if (etype.isa<FloatType>()) {
+    return FoldSlice<
+        llvm::mapped_iterator<DenseElementsAttr::IntElementIterator,
+                              std::function<APFloat(const APInt&)>>,
+        APFloat>(this, elements.getFloatValues().begin());
+  }
+
+  return {};
+}
+
+namespace {
+// In cases where a concat is fed into a slice, it is possible the concat
+// can be simplified or bypassed. This checks which inputs to the concat are
+// used by the slice, either reducing the number of concatenated values or
+// entirely removes the concat.
+struct SimplifyConcatSlice : public OpRewritePattern<SliceOp> {
+  using OpRewritePattern<SliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(SliceOp slice,
+                                PatternRewriter& rewriter) const override {
+    auto result_ty = slice.getType().cast<ShapedType>();
+    if (!result_ty.hasStaticShape()) {
+      return failure();
+    }
+
+    auto slice_input = slice.operand();
+    auto slice_input_ty = slice_input.getType().cast<ShapedType>();
+    auto concat = dyn_cast_or_null<ConcatenateOp>(slice_input.getDefiningOp());
+    if (!concat) {
+      return failure();
+    }
+
+    auto dimension = concat.dimension().getSExtValue();
+
+    auto start = slice.start_indices().getIntValues();
+    auto limit = slice.limit_indices().getIntValues();
+
+    auto slice_start = (*(start.begin() + dimension)).getSExtValue();
+    auto slice_limit = (*(limit.begin() + dimension)).getSExtValue();
+
+    // We need to determine what inputs from the concat affect the slice, and
+    // how the bounds of the slice need to be updated for the minimally required
+    // inputs.
+    int64_t running_size = 0;
+    int64_t front_offset = slice_input_ty.getShape()[dimension];
+
+    auto subset_start = concat.operand_end();
+    auto subset_end = concat.operand_end();
+    for (auto it = concat.operand_begin(); it < concat.operand_end(); ++it) {
+      auto input = *it;
+      ShapedType input_ty = input.getType().cast<ShapedType>();
+      if (input_ty.isDynamicDim(dimension)) {
+        return failure();
+      }
+      auto dim_size = input_ty.getShape()[dimension];
+
+      // If this position is in the slice its the start of the subset and we
+      // need to update the start and limit values.
+      if (running_size + dim_size > slice_start &&
+          subset_start == concat.operand_end()) {
+        subset_start = it;
+        front_offset = running_size;
+      }
+
+      // Determine the last required offset.
+      if (running_size < slice_limit) {
+        subset_end = it + 1;
+      }
+
+      running_size += dim_size;
+    }
+
+    auto subset_size = subset_end - subset_start;
+    // We need all inputs so no optimization.
+    if (subset_size == concat.getNumOperands()) {
+      return failure();
+    }
+
+    if (subset_size > 1 && !concat.getResult().hasOneUse()) {
+      return failure();
+    }
+
+    auto concat_range = OperandRange(subset_start, subset_end);
+    auto new_concat = rewriter.create<ConcatenateOp>(
+        concat.getLoc(), concat_range, concat.dimension());
+
+    llvm::SmallVector<APInt, 6> new_start(start);
+    llvm::SmallVector<APInt, 6> new_limit(limit);
+    new_start[dimension] -= front_offset;
+    new_limit[dimension] -= front_offset;
+
+    auto attr_type = slice.start_indices().getType().cast<ShapedType>();
+    auto create = rewriter.create<SliceOp>(
+        slice.getLoc(), new_concat,
+        DenseIntElementsAttr::get(attr_type, new_start),
+        DenseIntElementsAttr::get(attr_type, new_limit), slice.strides());
+    rewriter.replaceOp(slice, create.getResult());
+    return success();
+  }
+};
+}  // namespace
+
+void SliceOp::getCanonicalizationPatterns(OwningRewritePatternList& results,
+                                          MLIRContext* context) {
+  results.insert<SimplifyConcatSlice>(context);
+}
+
+// Returns output dimension size for slice result for the given arguments.
+// Returns -1 if arguments are illegal.
+static int64_t InferSliceDim(int64_t input_dim, int64_t start, int64_t end,
+                             int64_t stride) {
+  if (input_dim == -1 || start < 0 || start > end || end > input_dim ||
+      stride == 0)
+    return -1;
+
+  return llvm::divideCeil(end - start, stride);
+}
+
+Type SliceOp::InferOutputTypes(Builder* builder, Value operand,
+                               DenseIntElementsAttr start_indices,
+                               DenseIntElementsAttr limit_indices,
+                               DenseIntElementsAttr strides) {
+  Type ty = operand.getType();
+  RankedTensorType ranked_ty = ty.dyn_cast<RankedTensorType>();
+  if (!ranked_ty) return ty;
+  int64_t rank = ranked_ty.getRank();
+
+  // Illegal attributes.
+  ShapedType attr_ty = start_indices.getType();
+  if (attr_ty.getRank() != 1 || attr_ty.getNumElements() != rank ||
+      !attr_ty.getElementType().isSignlessInteger(64) ||
+      limit_indices.getType() != attr_ty || strides.getType() != attr_ty)
+    return ty;
+
+  SmallVector<int64_t, 4> start(start_indices.getValues<int64_t>());
+  SmallVector<int64_t, 4> limit(limit_indices.getValues<int64_t>());
+  SmallVector<int64_t, 4> stride_vals(strides.getValues<int64_t>());
+
+  SmallVector<int64_t, 4> shape;
+  shape.reserve(rank);
+  for (int64_t i = 0, e = rank; i != e; i++) {
+    shape.push_back(InferSliceDim(ranked_ty.getDimSize(i), start[i], limit[i],
+                                  stride_vals[i]));
+  }
+  return RankedTensorType::get(shape, ranked_ty.getElementType());
+}
+
+//===----------------------------------------------------------------------===//
+// SortOp
+//===----------------------------------------------------------------------===//
+
+void SortOp::build(OpBuilder& builder, OperationState& state,
+                   ValueRange operands, int64_t dimension, bool is_stable) {
+  state.addOperands(operands);
+  state.addAttribute("dimension", builder.getI64IntegerAttr(dimension));
+  state.addAttribute("is_stable", builder.getBoolAttr(dimension));
+
+  SmallVector<Type, 2> element_types;
+  element_types.reserve(operands.size());
+  for (Value operand : operands) element_types.push_back(operand.getType());
+  state.addTypes(builder.getTupleType(element_types));
+
+  state.addRegion();
+}
+
+static LogicalResult Verify(SortOp op) {
+  Operation::operand_range operands = op.operands();
+  if (operands.empty()) return op.emitOpError("requires at least one input");
+
+  // TODO(antiagainst): verify partionally dynamic shapes
+  if (llvm::all_of(operands, [](Value operand) {
+        return operand.getType().cast<ShapedType>().hasRank();
+      })) {
+    ArrayRef<int64_t> input_shape =
+        (*operands.begin()).getType().cast<ShapedType>().getShape();
+
+    if (llvm::any_of(llvm::drop_begin(operands, 1), [&](Value operand) {
+          return operand.getType().cast<ShapedType>().getShape() != input_shape;
+        }))
+      return op.emitOpError("requires all inputs to have the same dimensions");
+
+    int64_t rank = input_shape.size();
+    int64_t cmp_dim = op.dimension().getSExtValue();
+    if (cmp_dim < -rank || cmp_dim >= rank)
+      return op.emitOpError("dimension attribute value must be in range [-")
+             << rank << ", " << rank << "), but found " << cmp_dim;
+  }
+
+  Block& block = op.comparator().front();
+  size_t num_operands = op.getOperation()->getNumOperands();
+  if (block.getNumArguments() != 2 * num_operands)
+    return op.emitOpError("comparator block should have ")
+           << 2 * num_operands << " arguments";
+
+  for (auto indexed_operand : llvm::enumerate(operands)) {
+    int index = indexed_operand.index();
+    Type element_type =
+        indexed_operand.value().getType().cast<ShapedType>().getElementType();
+    Type tensor_type = RankedTensorType::get({}, element_type);
+    for (int i : {2 * index, 2 * index + 1}) {
+      Type arg_type = block.getArgument(i).getType();
+      if (arg_type != tensor_type)
+        return op.emitOpError("comparator block argument #")
+               << i << " should be of type " << tensor_type << " but got "
+               << arg_type;
+    }
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// TransposeOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult TransposeOp::fold(ArrayRef<Attribute> operands) {
+  for (auto it : llvm::enumerate(permutation().getValues<APInt>())) {
+    if (it.index() != it.value()) {
+      return {};
+    }
+  }
+  return getOperand();
+}
+
+static LogicalResult Verify(TransposeOp op) {
+  // permutation is an attribute of the op so it has static shape.
+  auto permutationType = op.permutation().getType();
+  auto permutationRank = permutationType.getRank();
+  if (permutationRank != 1) {
+    return op.emitOpError(llvm::formatv(
+        "permutation has rank {0} instead of rank 1", permutationRank));
+  }
+  auto permutationSize = permutationType.getNumElements();
+
+  auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
+  if (operandType) {
+    auto operandRank = operandType.getRank();
+    if (operandRank != permutationSize) {
+      return op.emitOpError(llvm::formatv(
+          "operand rank ({0}) does not match permutation size ({1})",
+          operandRank, permutationSize));
+    }
+  }
+
+  auto resultType = op.getResult().getType().dyn_cast<RankedTensorType>();
+  if (resultType) {
+    auto resultRank = resultType.getRank();
+    if (resultRank != permutationSize) {
+      return op.emitOpError(llvm::formatv(
+          "result rank ({0}) does not match permutation size ({1})", resultRank,
+          permutationSize));
+    }
+  }
+
+  if (!resultType || !operandType) return success();
+
+  auto operandRank = operandType.getRank();
+  SmallVector<int64_t, 4> expectedShape(operandRank);
+  for (int i = 0; i != operandRank; ++i) {
+    auto permutedDim = op.permutation().getValue<IntegerAttr>(i).getInt();
+    expectedShape[i] = operandType.getDimSize(permutedDim);
+  }
+
+  auto expectedType =
+      RankedTensorType::get(expectedShape, resultType.getElementType());
+  if (failed(verifyCompatibleShape(resultType, expectedType))) {
+    return op.emitOpError(llvm::formatv(
+        "result type {0} is incompatible with the expected type {1}",
+        resultType, expectedType));
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// TriangularSolveOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(TriangularSolveOp op) {
+  auto a_type = op.a().getType().dyn_cast<RankedTensorType>();
+
+  // Skip verifier if a is unranked tensor.
+  if (!a_type) return success();
+
+  // Check that a should have rank >= 2
+  auto a_rank = a_type.getRank();
+  if (a_rank < 2)
+    return op.emitOpError()
+           << "operand 'a' must have rank >= 2, but got " << a_type;
+
+  // The two minor dimensions of a must have same size.
+  if (a_type.getDimSize(a_rank - 2) != a_type.getDimSize(a_rank - 1))
+    return op.emitOpError() << "two minor dimensions of operand 'a' must have "
+                               "equal size, but got "
+                            << a_type;
+
+  auto b_type = op.b().getType().dyn_cast<RankedTensorType>();
+  // If b is unranked skip remaining checks.
+  if (!b_type) return success();
+
+  // Check that a and b have same rank.
+  auto b_rank = b_type.getRank();
+  if (a_rank != b_rank)
+    return op.emitOpError() << "operands must have equal rank, but got "
+                            << a_type << " and " << b_type;
+
+  // The shared dimension of a and b should match.
+  if (a_type.getDimSize(a_rank - 1) !=
+      b_type.getDimSize(b_rank - (op.left_side() ? 2 : 1)))
+    return op.emitOpError() << "shared dimension of operands 'a' and 'b' does "
+                               "not match, but got "
+                            << a_type << " and " << b_type;
+
+  // The leading batch dimensions of a and b must be equal.
+  auto a_batch_dims = a_type.getShape().drop_back(2);
+  auto b_batch_dims = b_type.getShape().drop_back(2);
+  if (a_batch_dims != b_batch_dims)
+    return op.emitOpError()
+           << "leading batch dimensions of the operands must be same, but got "
+           << a_type << " and " << b_type;
+
+  // Result and argument b must have same shape.
+  auto result_type = op.getType().dyn_cast<RankedTensorType>();
+  if (!result_type) return success();
+  if (result_type != b_type)
+    return op.emitOpError()
+           << "result and operand 'b' must have same shape, but got "
+           << result_type << " and " << b_type;
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetTupleElementOp
+//===----------------------------------------------------------------------===//
+
+void GetTupleElementOp::build(OpBuilder& builder, OperationState& result,
+                              Value tuple, int32_t index) {
+  if (auto tuple_type = tuple.getType().dyn_cast<TupleType>()) {
+    auto element_type = tuple_type.getType(index);
+    build(builder, result, element_type, tuple,
+          builder.getI32IntegerAttr(index));
+    return;
+  }
+
+  build(builder, result, tuple.getType(), tuple,
+        builder.getI32IntegerAttr(index));
+}
+
+//===----------------------------------------------------------------------===//
+// TupleOp
+//===----------------------------------------------------------------------===//
+
+void TupleOp::build(OpBuilder& builder, OperationState& result,
+                    ValueRange values) {
+  SmallVector<Type, 4> types;
+  types.reserve(values.size());
+  for (auto val : values) {
+    types.push_back(val.getType());
+  }
+
+  build(builder, result, builder.getTupleType(types), values);
+}
+
+//===----------------------------------------------------------------------===//
+// UnaryEinsumOp
+//===----------------------------------------------------------------------===//
+
+void UnaryEinsumOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<UnaryEinsumToEinsum>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// CompareOp
+//===----------------------------------------------------------------------===//
+
+void CompareOp::build(OpBuilder& builder, OperationState& result, Value lhs,
+                      Value rhs, StringAttr comparison_direction) {
+  auto new_type =
+      UpdateResultElementType(&builder, lhs.getType(), builder.getI1Type());
+  build(builder, result, new_type, lhs, rhs, comparison_direction);
+}
+
+#define GET_OP_CLASSES
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc"
+
+//===----------------------------------------------------------------------===//
+// xla_hlo Dialect Interfaces
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct HLOInlinerInterface : public DialectInlinerInterface {
+  using DialectInlinerInterface::DialectInlinerInterface;
+  // We don't have any special restrictions on what can be inlined into
+  // destination regions (e.g. while/conditional bodies). Always allow it.
+  bool isLegalToInline(Region* dest, Region* src,
+                       BlockAndValueMapping& valueMapping) const final {
+    return true;
+  }
+  // Operations in xla_hlo dialect are always legal to inline since they are
+  // pure.
+  bool isLegalToInline(Operation*, Region*, BlockAndValueMapping&) const final {
+    return true;
+  }
+};
+}  // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// xla_hlo Dialect Constructor
+//===----------------------------------------------------------------------===//
+
+XlaHloDialect::XlaHloDialect(MLIRContext* context)
+    : Dialect(getDialectNamespace(), context) {
+  addOperations<
+#define GET_OP_LIST
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc"
+      >();
+  addInterfaces<HLOInlinerInterface>();
+  addTypes<TokenType>();
+  // Support unknown operations because not all XLA operations are registered.
+  // allowUnknownOperations();
+}
+
+Type XlaHloDialect::parseType(DialectAsmParser& parser) const {
+  StringRef data_type;
+  if (parser.parseKeyword(&data_type)) return Type();
+
+  if (data_type == "token") return TokenType::get(getContext());
+  parser.emitError(parser.getNameLoc())
+      << "unknown xla_hlo type: " << data_type;
+  return nullptr;
+}
+
+void XlaHloDialect::printType(Type type, DialectAsmPrinter& os) const {
+  if (type.isa<TokenType>()) {
+    os << "token";
+    return;
+  }
+  os << "<unknown xla_hlo type>";
+}
+
+//===----------------------------------------------------------------------===//
+// Shape inference
+//===----------------------------------------------------------------------===//
+
+LogicalResult deriveShapeFromFirstOperand(
+    OpBuilder* builder, Operation* op,
+    SmallVectorImpl<Value>* reifiedReturnShapes) {
+  Value operand = op->getOperand(0);
+  ShapedType operand_type = operand.getType().dyn_cast<ShapedType>();
+  if (!operand_type) {
+    op->emitOpError() << "first operand is not a shaped type";
+    return failure();
+  }
+  auto loc = op->getLoc();
+  SmallVector<Value, 4> shape_values;
+  shape_values.reserve(operand_type.getRank());
+  auto shape_scalar_type = builder->getIntegerType(64);
+  for (auto element : llvm::enumerate(operand_type.getShape())) {
+    if (element.value() == ShapedType::kDynamicSize) {
+      Value dim = builder->create<DimOp>(loc, operand, element.index());
+      shape_values.push_back(
+          builder->create<IndexCastOp>(loc, dim, shape_scalar_type));
+    } else {
+      shape_values.push_back(builder->create<ConstantOp>(
+          loc, builder->getI64IntegerAttr(element.value())));
+    }
+  }
+  *reifiedReturnShapes = SmallVector<Value, 1>{
+      builder->create<TensorFromElementsOp>(loc, shape_values)};
+  return success();
+}
+
+}  // namespace xla_hlo
+}  // namespace mlir
diff --git a/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc b/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc
new file mode 100644
index 0000000..23712d1
--- /dev/null
+++ b/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc
@@ -0,0 +1,22 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h"
+
+namespace mlir {
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.cc.inc"
+
+}  // namespace mlir
diff --git a/lib/Dialect/mhlo/IR/lhlo_ops.cc b/lib/Dialect/mhlo/IR/lhlo_ops.cc
new file mode 100644
index 0000000..3e374a4
--- /dev/null
+++ b/lib/Dialect/mhlo/IR/lhlo_ops.cc
@@ -0,0 +1,102 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines the operations used in the XLA dialect.
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/APFloat.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/APInt.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/ArrayRef.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/STLExtras.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/SmallVector.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/Support/FormatVariadic.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Builders.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Dialect.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Location.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpDefinition.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpImplementation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Operation.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OperationSupport.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/PatternMatch.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/TypeUtilities.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Types.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Value.h"
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc"
+
+namespace mlir {
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_structs.cc.inc"
+namespace xla_lhlo {
+
+XlaLhloDialect::XlaLhloDialect(MLIRContext *context)
+    : Dialect(getDialectNamespace(), context) {
+  addOperations<
+#define GET_OP_LIST
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc"
+      >();
+}
+
+//===----------------------------------------------------------------------===//
+// StaticMemRefCastOp
+//===----------------------------------------------------------------------===//
+
+Value StaticMemRefCastOp::getViewSource() { return *getODSOperands(0).begin(); }
+
+static LogicalResult Verify(StaticMemRefCastOp op) {
+  if (!op.operand().getType().cast<ShapedType>().hasStaticShape())
+    return op.emitOpError("operand must have static shape");
+  if (!op.getType().hasStaticShape())
+    return op.emitOpError("result must have static shape");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicMemRefCastOp
+//===----------------------------------------------------------------------===//
+
+Value DynamicMemRefCastOp::getViewSource() {
+  return *getODSOperands(0).begin();
+}
+
+static LogicalResult Verify(DynamicMemRefCastOp op) {
+  // Check if `sizes` and `strides` args are compatible with the result type.
+  if (op.sizes().size() != op.getType().getRank())
+    return op.emitOpError(
+        "`sizes` args count must be equal to the rank of the output memref");
+  return success();
+}
+
+#define GET_OP_CLASSES
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc"
+
+// TODO(cheshire): Support folding, reuse code from hlo_ops.cc.
+
+void FusionOp::build(OpBuilder &builder, OperationState &result,
+                     ArrayRef<NamedAttribute> attributes) {
+  result.addAttributes(attributes);
+  Region *bodyRegion = result.addRegion();
+  FusionOp::ensureTerminator(*bodyRegion, builder, result.location);
+}
+
+}  // namespace xla_lhlo
+}  // namespace mlir
diff --git a/lib/Dialect/mhlo/transforms/canonicalize.td b/lib/Dialect/mhlo/transforms/canonicalize.td
new file mode 100644
index 0000000..a6435bc
--- /dev/null
+++ b/lib/Dialect/mhlo/transforms/canonicalize.td
@@ -0,0 +1,30 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is the canonicalize pattern definition file.
+
+include "third_party/llvm/llvm-project/mlir/include/mlir/IR/OpBase.td"
+include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.td"
+include "mlir-hlo/Dialect/mhlo/IR/hlo_utils.td"
+
+def UnaryToBinaryEinsumEq : NativeCodeCall<
+  "$_builder.getStringAttr(\",\" + $0.getValue().str())">;
+
+// Convert UnaryEinsumOp to EinsumOp with two operands with redundant first
+// operand.
+def UnaryEinsumToEinsum : Pat<
+  (HLO_UnaryEinsumOp $operand, $equation),
+  (HLO_EinsumOp (HLO_ConstOp (GetScalarOfType<1> $operand)),
+                $operand, (UnaryToBinaryEinsumEq $equation))>;
diff --git a/lib/utils/broadcast_utils.cc b/lib/utils/broadcast_utils.cc
new file mode 100644
index 0000000..1c1499a
--- /dev/null
+++ b/lib/utils/broadcast_utils.cc
@@ -0,0 +1,74 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h"
+
+#include <algorithm>
+
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/Sequence.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/SmallVector.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/Shape/IR/Shape.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Diagnostics.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+
+namespace mlir {
+namespace xla {
+
+bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs,
+                                 DenseIntElementsAttr broadcast_dims) {
+  RankedTensorType lhs_type = lhs.getType().dyn_cast<RankedTensorType>();
+  RankedTensorType rhs_type = rhs.getType().dyn_cast<RankedTensorType>();
+  if (!lhs_type || !rhs_type) return false;
+  if (lhs_type.getRank() == rhs_type.getRank()) return true;
+
+  // Otherwise, verify that broadcast_dims strictly performs left-padding.
+  auto smaller_rank = std::min(lhs_type.getRank(), rhs_type.getRank());
+  auto larger_rank = std::max(lhs_type.getRank(), rhs_type.getRank());
+
+  if (smaller_rank != broadcast_dims.getNumElements()) {
+    return false;
+  }
+  auto expected_extents =
+      llvm::seq<int64_t>(larger_rank - smaller_rank, larger_rank);
+  return std::equal(expected_extents.begin(), expected_extents.end(),
+                    broadcast_dims.getIntValues().begin());
+}
+
+Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs,
+                                                        Value rhs,
+                                                        OpBuilder& builder) {
+  auto lhs_type = lhs.getType().dyn_cast<RankedTensorType>();
+  auto rhs_type = rhs.getType().dyn_cast<RankedTensorType>();
+  if (!lhs_type || !rhs_type) {
+    emitError(loc) << "shape computation for broadcasting elementwise ops "
+                   << "is only implemented for ranked tensors";
+    return nullptr;
+  }
+
+  int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank());
+  auto shape_type = shape::ShapeType::get(builder.getContext());
+  Value lhs_shape_v =
+      builder.createOrFold<shape::ShapeOfOp>(loc, shape_type, lhs);
+  Value rhs_shape_v =
+      builder.createOrFold<shape::ShapeOfOp>(loc, shape_type, rhs);
+  Value result_shape_v = builder.createOrFold<shape::BroadcastOp>(
+      loc, shape_type, lhs_shape_v, rhs_shape_v, nullptr /* error */);
+  return builder.createOrFold<shape::ToExtentTensorOp>(
+      loc, RankedTensorType::get({result_rank}, builder.getIndexType()),
+      result_shape_v);
+}
+
+}  // namespace xla
+}  // namespace mlir
diff --git a/lib/utils/convert_op_folder.cc b/lib/utils/convert_op_folder.cc
new file mode 100644
index 0000000..cf6b56f
--- /dev/null
+++ b/lib/utils/convert_op_folder.cc
@@ -0,0 +1,86 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines helpers useful when creating or manipulating lhlo/hlo.
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h"
+
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/TypeUtilities.h"
+
+namespace mlir {
+namespace xla {
+
+mlir::ElementsAttr ConvertElementsAttr(const mlir::ElementsAttr& elements,
+                                       mlir::Type new_type) {
+  auto old_type = getElementTypeOrSelf(elements);
+  size_t bit_width = new_type.isBF16() ? 64 : new_type.getIntOrFloatBitWidth();
+
+  if (old_type.isa<mlir::FloatType>()) {
+    // mapValues always takes a function returning APInt, even when the output
+    // is actually float.
+    using func_type = mlir::APInt(const llvm::APFloat&);
+    if (auto newFloatType = new_type.dyn_cast<mlir::FloatType>()) {
+      // Float -> Float
+      return elements.mapValues(
+          new_type, llvm::function_ref<func_type>(
+                        [&newFloatType](const llvm::APFloat& floatVal) {
+                          llvm::APFloat newDouble(
+                              mlir::FloatAttr::getValueAsDouble(floatVal));
+                          bool loses_info = false;
+                          newDouble.convert(newFloatType.getFloatSemantics(),
+                                            llvm::APFloat::rmNearestTiesToEven,
+                                            &loses_info);
+                          return newDouble.bitcastToAPInt();
+                        }));
+    }
+    // Float -> Int
+    return elements.mapValues(
+        new_type, llvm::function_ref<func_type>(
+                      [&bit_width](const llvm::APFloat& floatVal) {
+                        return llvm::APInt(
+                            bit_width,
+                            mlir::FloatAttr::getValueAsDouble(floatVal));
+                      }));
+  }
+
+  // old_type is Integer
+  // mapValues always takes a function returning APInt, even when the output
+  // is actually float.
+  using func_type = llvm::APInt(const llvm::APInt&);
+  if (auto newFloatType = new_type.dyn_cast<mlir::FloatType>()) {
+    // Int -> Float
+    return elements.mapValues(
+        new_type, llvm::function_ref<func_type>([&newFloatType](
+                                                    const llvm::APInt& intVal) {
+          llvm::APFloat newDouble(static_cast<double>(intVal.getSExtValue()));
+          bool loses_info = false;
+          newDouble.convert(newFloatType.getFloatSemantics(),
+                            llvm::APFloat::rmNearestTiesToEven, &loses_info);
+          return newDouble.bitcastToAPInt();
+        }));
+  }
+  // new_type is Integer
+  // Int -> Int
+  return elements.mapValues(
+      new_type,
+      llvm::function_ref<func_type>([&bit_width](const llvm::APInt& intVal) {
+        return llvm::APInt(bit_width, intVal.getSExtValue());
+      }));
+}
+
+}  // namespace xla
+}  // namespace mlir
diff --git a/lib/utils/hlo_utils.cc b/lib/utils/hlo_utils.cc
new file mode 100644
index 0000000..75c01a9
--- /dev/null
+++ b/lib/utils/hlo_utils.cc
@@ -0,0 +1,70 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h"
+
+#include <numeric>
+
+#include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h"
+
+namespace mlir {
+namespace xla {
+
+DenseIntElementsAttr getBroadcastDimensionsAttr(Builder *b, Value x, Value y,
+                                                bool allow_empty) {
+  TensorType xType = x.getType().dyn_cast<RankedTensorType>();
+  TensorType yType = y.getType().dyn_cast<RankedTensorType>();
+  if (!xType || !yType) return {};
+  if (allow_empty && xType == yType) return {};
+
+  // If the shapes have the same rank, then there is nothing to do.
+  auto xRank = xType.getRank(), yRank = yType.getRank();
+  if (allow_empty && xRank == yRank) return {};
+
+  // Otherwise if the ranks of the inputs don't match, TensorFlow automatically
+  // reshapes the smaller by padding with dimensions of size 1 as a prefix. In
+  // other words to pad a 5-vector to a 3-dimensional tensor it is reshaped to
+  // have shape [1,1,5]. XLA's automatic broadcast code is able to broadcast
+  // from lower to higher rank, but doesn't assume you want to pad as a prefix
+  // of the dimensions, and instead needs to be told which dimensions of the
+  // higher rank tensor to match to the lower rank tensor.
+  auto maxRank = std::max(xRank, yRank);
+  auto minRank = std::min(xRank, yRank);
+
+  // Match the lower rank tensor along the larger-numbered dimensions of the
+  // higher rank tensor.
+  SmallVector<int64_t, 4> broadcastDimensions(minRank);
+  std::iota(broadcastDimensions.begin(), broadcastDimensions.end(),
+            maxRank - minRank);
+
+  RankedTensorType type =
+      RankedTensorType::get({minRank}, b->getIntegerType(64));
+  return DenseIntElementsAttr::get(type, broadcastDimensions);
+}
+
+DenseElementsAttr GetScalarOfType(Type ty, int64_t raw_value) {
+  RankedTensorType scalar_ty = RankedTensorType::get({}, ty);
+
+  if (auto float_ty = ty.dyn_cast<FloatType>()) {
+    APFloat value(float_ty.getFloatSemantics(), raw_value);
+    return DenseElementsAttr::get(scalar_ty, value);
+  }
+  auto int_ty = ty.cast<IntegerType>();
+  APInt value(int_ty.getWidth(), static_cast<int64_t>(raw_value), true);
+  return DenseElementsAttr::get(scalar_ty, value);
+}
+
+}  // namespace xla
+}  // namespace mlir