[mhlo] Add legalize to SCF pass

Start of pass to legalize MHLO control flow to SCF for further optimization in common form. The current version just matches a very simple instance (which also happens to occur a few times). Exposes some further canonicalization opportunities that aren't yet addressed. PiperOrigin-RevId: 329017723
2020-08-28 15:10:56 -07:00 · 2020-08-28 15:10:56 -07:00 · 344c500fca
parent 7176fb1839
commit 344c500fca
5 changed files with 246 additions and 0 deletions
--- a/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td
+++ b/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td
@ -30,6 +30,11 @@ def LegalizeControlFlowPass : Pass<"mhlo-legalize-control-flow", "FuncOp"> {
  let constructor = "createLegalizeControlFlowPass()";
 }

+def LegalizeControlFlowToScfPass : Pass<"mhlo-control-flow-to-scf", "FuncOp"> {
+  let summary = "Legalize from MHLO control flow to SCF control flow.";
+  let constructor = "createControlFlowToScfPass()";
+}
+
 def LegalizeGatherToTorchIndexSelectPass : Pass<"mhlo-legalize-gather-to-torch-index-select", "FuncOp"> {
  let summary = "Legalizes gathers to a torch index select.";
  let constructor = "createLegalizeGatherToTorchIndexSelectPass()";
--- a/include/mlir-hlo/Dialect/mhlo/transforms/passes.h
+++ b/include/mlir-hlo/Dialect/mhlo/transforms/passes.h
@ -35,6 +35,9 @@ namespace mhlo {
 /// Lowers HLO control flow ops to the Standard dialect.
 std::unique_ptr<OperationPass<FuncOp>> createLegalizeControlFlowPass();

+/// Lowers MHLO control flow ops to the SCF dialect.
+std::unique_ptr<OperationPass<FuncOp>> createControlFlowToScfPass();
+
 /// Lowers from HLO dialect to Standard dialect.
 std::unique_ptr<OperationPass<FuncOp>> createLegalizeToStdPass();

--- a/lib/Dialect/mhlo/transforms/CMakeLists.txt
+++ b/lib/Dialect/mhlo/transforms/CMakeLists.txt
@ -93,6 +93,7 @@ add_mlir_library(MhloToLhloConversion
 add_mlir_library(MhloToStandard
  legalize_control_flow.cc
  legalize_to_standard.cc
+  mhlo_control_flow_to_scf.cc

  DEPENDS
  MLIRhlo_opsIncGen
--- a/lib/Dialect/mhlo/transforms/mhlo_control_flow_to_scf.cc
+++ b/lib/Dialect/mhlo/transforms/mhlo_control_flow_to_scf.cc
@ -0,0 +1,199 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "llvm/Support/Casting.h"
+#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
+#include "mlir-hlo/Dialect/mhlo/transforms/passes.h"
+#include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/StandardTypes.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+
+#define DEBUG_TYPE "mhlo-control-flow-to-scf"
+
+namespace mlir {
+namespace mhlo {
+
+namespace {
+
+/// Convert MHLO While to SCF.
+void MatchAndRewrite(WhileOp whileOp);
+
+/// Pass that converts MHLO control flow to SCF.
+class ControlFlowToScfPass
+    : public mlir::PassWrapper<ControlFlowToScfPass, FunctionPass> {
+  void getDependentDialects(DialectRegistry& registry) const override {
+    registry.insert<scf::SCFDialect>();
+  }
+  void runOnFunction() override {
+    getFunction().walk([&](WhileOp whileOp) { MatchAndRewrite(whileOp); });
+  }
+};
+
+// TODO(jpienaar): Look into reformulating as a pattern.
+void MatchAndRewrite(WhileOp whileOp) {
+  // Handle pattern:
+  //   x = start
+  //   step = ...
+  //   limit = ...
+  //   while (x < limit) { ... x += step; }
+
+  // Only handling multi value while loops at the moment.
+  auto tupleOp = whileOp.getOperand().getDefiningOp<TupleOp>();
+  if (!tupleOp) return;
+  auto bodyReturn = whileOp.body()
+                        .front()
+                        .getTerminator()
+                        ->getOperand(0)
+                        .getDefiningOp<mhlo::TupleOp>();
+  // Note: due to the shape restrictions on While, if the operand to While is a
+  // tuple, then so is the return type of the body. But the verifier isn't
+  // checking that at the moment, so just bail out here if this doesn't hold.
+  if (!bodyReturn) return;
+
+  Value result = whileOp.cond().front().getTerminator()->getOperand(0);
+  // TODO(jpienaar): Expand to handle more than simple case with LT compare and
+  // constant step.
+  auto cmp = result.getDefiningOp<mhlo::CompareOp>();
+  if (!cmp || cmp.comparison_direction() != "LT") return;
+
+  const int kConstant = -1;
+  auto getValueAndIndex = [&](Value val) -> std::pair<Value, int> {
+    if (matchPattern(val, m_Constant())) return {val, kConstant};
+    // If it is defined by a tuple, then the tuple has to have been fed in and
+    // the external value is captured.
+    if (auto gte = val.getDefiningOp<GetTupleElementOp>()) {
+      if (!gte.getOperand().isa<mlir::BlockArgument>()) return {nullptr, 0};
+      int index = gte.index().getSExtValue();
+      return {tupleOp.getOperand(index), index};
+    }
+    return {nullptr, 0};
+  };
+
+  using ValueIndex = std::pair<Value, int>;
+  ValueIndex loopIndVar = getValueAndIndex(cmp.lhs());
+  ValueIndex max = getValueAndIndex(cmp.rhs());
+  if (!loopIndVar.first || !max.first) return;
+  auto add =
+      bodyReturn.getOperand(loopIndVar.second).getDefiningOp<mhlo::AddOp>();
+  if (!add) return;
+  ValueIndex step = getValueAndIndex(add.rhs());
+  if (step.second != kConstant || !step.first) return;
+
+  // Only handle case where tuple isn't propagated as is for now.
+  // TODO(jpienaar): Remove this when a tuple is also created inside the loop
+  // to propagate.
+  for (auto* use : whileOp.body().front().getArgument(0).getUsers())
+    if (!isa<GetTupleElementOp>(use)) return;
+
+  LLVM_DEBUG(llvm::dbgs() << "Found for (" << whileOp.getLoc() << "):\n";
+             llvm::dbgs() << "  loopIndVar = " << loopIndVar.second << " max = "
+                          << max.second << " step = " << step.second << "\n";
+             llvm::dbgs() << "  loopIndVar = " << loopIndVar.first << " max = "
+                          << max.first << " step = " << step.first << "\n";);
+  OpBuilder b(whileOp);
+  // Inputs to new for loop.
+  llvm::SmallVector<Value, 4> input;
+  input.reserve(tupleOp.getNumOperands());
+  for (auto r : tupleOp.getOperands().take_front(loopIndVar.second))
+    input.push_back(r);
+  for (auto r : tupleOp.getOperands().drop_front(loopIndVar.second + 1))
+    input.push_back(r);
+
+  auto tensorIndexType = RankedTensorType::get({}, b.getIndexType());
+  auto getAsIndex = [&](Value val) {
+    auto loc = whileOp.getLoc();
+    return b.create<ExtractElementOp>(
+        loc, b.create<IndexCastOp>(loc, tensorIndexType, val), ValueRange());
+  };
+
+  // SCF for uses index type, so converted these.
+  auto forloopIndVar = getAsIndex(loopIndVar.first);
+  auto forMax = getAsIndex(max.first);
+  auto forStep = getAsIndex(step.first);
+  auto forOp = b.create<mlir::scf::ForOp>(whileOp.getLoc(), forloopIndVar,
+                                          forMax, forStep, input);
+  // Transfer the body without the block arguments.
+  forOp.getLoopBody().front().getOperations().splice(
+      forOp.getLoopBody().front().getOperations().end(),
+      whileOp.body().front().getOperations());
+
+  b.setInsertionPointToStart(&forOp.getLoopBody().front());
+  auto loopIndVarElType =
+      loopIndVar.first.getType().cast<ShapedType>().getElementType();
+  Value indVar = b.create<SplatOp>(
+      whileOp.getLoc(), RankedTensorType::get({}, loopIndVarElType),
+      b.create<IndexCastOp>(whileOp.getLoc(), loopIndVarElType,
+                            forOp.getInductionVar()));
+  // Update all block argument users to the SCF For args.
+  for (auto* use :
+       llvm::make_early_inc_range(whileOp.body().getArgument(0).getUsers())) {
+    // TODO(jpienaar): Expand here too when we allow using the tuple in the
+    // loop.
+    auto gte = cast<GetTupleElementOp>(use);
+    // If the loop induction var, then refer to the loop induction variable as
+    // this operand is not updated.
+    if (gte.index() == loopIndVar.second) {
+      use->getResult(0).replaceAllUsesWith(indVar);
+      use->erase();
+      continue;
+    }
+    int index = gte.index().getSExtValue();
+    // If after the loop induction variable, then decrement as we don't include
+    // the loop induction variable in the for iter operands.
+    if (index > loopIndVar.second) --index;
+    use->getResult(0).replaceAllUsesWith(forOp.getIterOperands()[index]);
+    use->erase();
+  }
+
+  // Create new yield op without induction var update.
+  SmallVector<Value, 4> newYieldOps;
+  newYieldOps.reserve(bodyReturn.getNumOperands() - 1);
+  for (auto r : bodyReturn.getOperands().take_front(loopIndVar.second))
+    newYieldOps.push_back(r);
+  for (auto r : bodyReturn.getOperands().drop_front(loopIndVar.second + 1))
+    newYieldOps.push_back(r);
+  // Delete return & tuple op.
+  forOp.getLoopBody().front().back().erase();
+  forOp.getLoopBody().front().back().erase();
+  b.setInsertionPointToEnd(&forOp.getLoopBody().front());
+  b.create<scf::YieldOp>(whileOp.getLoc(), newYieldOps);
+
+  // Recombine output tuple with max value of induction variable.
+  llvm::SmallVector<Value, 4> loopOut;
+  loopOut.reserve(forOp.getNumResults() + 1);
+  for (auto r : forOp.getResults().take_front(loopIndVar.second))
+    loopOut.push_back(r);
+  loopOut.push_back(max.first);
+  for (auto r : forOp.getResults().drop_front(loopIndVar.second))
+    loopOut.push_back(r);
+  b.setInsertionPoint(whileOp);
+  auto newRes = b.create<mhlo::TupleOp>(whileOp.getLoc(), loopOut);
+  whileOp.replaceAllUsesWith(newRes.getOperation());
+  whileOp.erase();
+}
+
+}  // anonymous namespace
+
+std::unique_ptr<OperationPass<FuncOp>> createControlFlowToScfPass() {
+  return std::make_unique<ControlFlowToScfPass>();
+}
+
+}  // namespace mhlo
+}  // namespace mlir
--- a/tests/legalize_to_scf.mlir
+++ b/tests/legalize_to_scf.mlir
@ -0,0 +1,38 @@
+// RUN: mlir-hlo-opt --mhlo-control-flow-to-scf %s | FileCheck %s
+
+func @lt_loop(%arg0: tensor<4xf32>, %arg1: tensor<f32>, %arg2: tensor<f32>, %arg3: tensor<4xf32>, %arg4: tensor<f32>, %arg5: tensor<f32>, %arg6: tensor<f32>, %arg7: tensor<f32>, %arg8: tensor<i32>) -> (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) {
+  %cst = constant dense<-1> : tensor<i32>
+  %cst_0 = constant dense<1> : tensor<i32>
+  %cst_1 = constant dense<0> : tensor<i32>
+  %cst_2 = constant dense<1000> : tensor<i32>
+  %0 = "mhlo.tuple"(%cst_1, %cst, %cst_2) : (tensor<i32>, tensor<i32>, tensor<i32>) -> tuple<tensor<i32>, tensor<i32>, tensor<i32>>
+  %1 = "mhlo.while"(%0) ( {
+  ^bb0(%arg9: tuple<tensor<i32>, tensor<i32>, tensor<i32>>):  // no predecessors
+    %2 = "mhlo.get_tuple_element"(%arg9) {index = 0 : i32} : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> tensor<i32>
+    %3 = "mhlo.get_tuple_element"(%arg9) {index = 2 : i32} : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> tensor<i32>
+    %4 = "mhlo.compare"(%2, %3) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+    "mhlo.return"(%4) : (tensor<i1>) -> ()
+  },  {
+  ^bb0(%arg9: tuple<tensor<i32>, tensor<i32>, tensor<i32>>):  // no predecessors
+    %2 = "mhlo.get_tuple_element"(%arg9) {index = 0 : i32} : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> tensor<i32>
+    %3 = mhlo.add %2, %cst_0 : tensor<i32>
+    %4 = "mhlo.get_tuple_element"(%arg9) {index = 1 : i32} : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> tensor<i32>
+    %5 = "mhlo.get_tuple_element"(%arg9) {index = 2 : i32} : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> tensor<i32>
+    %6 = "mhlo.tuple"(%3, %4, %5) : (tensor<i32>, tensor<i32>, tensor<i32>) -> tuple<tensor<i32>, tensor<i32>, tensor<i32>>
+    "mhlo.return"(%6) : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> ()
+  }) : (tuple<tensor<i32>, tensor<i32>, tensor<i32>>) -> tuple<tensor<i32>, tensor<i32>, tensor<i32>>
+  return %1 : tuple<tensor<i32>, tensor<i32>, tensor<i32>>
+}
+
+// CHECK-LABEL:   func @lt_loop(
+// CHECK:  %[[VAL_9:.*]] = constant dense<-1> : tensor<i32>
+// CHECK:  %[[VAL_10:.*]] = constant dense<1> : tensor<i32>
+// CHECK:  %[[VAL_11:.*]] = constant dense<0> : tensor<i32>
+// CHECK:  %[[VAL_12:.*]] = constant dense<1000> : tensor<i32>
+// CHECK:  %[[VAL_14:.*]] = index_cast %[[VAL_11]] : tensor<i32> to tensor<index>
+// CHECK:  %[[VAL_15:.*]] = extract_element %[[VAL_14]][] : tensor<index>
+// CHECK:  %[[VAL_16:.*]] = index_cast %[[VAL_12]] : tensor<i32> to tensor<index>
+// CHECK:  %[[VAL_17:.*]] = extract_element %[[VAL_16]][] : tensor<index>
+// CHECK:  %[[VAL_18:.*]] = index_cast %[[VAL_10]] : tensor<i32> to tensor<index>
+// CHECK:  %[[VAL_19:.*]] = extract_element %[[VAL_18]][] : tensor<index>
+// CHECK:  scf.for %[[VAL_21:.*]] = %[[VAL_15]] to %[[VAL_17]] step %[[VAL_19]] iter_args(%[[VAL_22:.*]] = %[[VAL_9]], %[[VAL_23:.*]] = %[[VAL_12]])