[MLIR][HLO] Add `rank-specialization-to-scf` pass

Currently the lowering is only implemented for the unary case. The n-ary case will follow. PiperOrigin-RevId: 374162772
2021-05-17 03:55:32 -07:00 · 2021-05-17 03:55:32 -07:00 · ccd70d5717
parent 295ef229d6
commit ccd70d5717
5 changed files with 127 additions and 4 deletions
--- a/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td
+++ b/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td
@ -128,3 +128,8 @@ def RankSpecializationClusterPass
    : Pass<"mhlo-rank-specialization-cluster", "FuncOp"> {
  let constructor = "createRankSpecializationClusterPass()";
 }
 def RankSpecializationToSCFPass
    : Pass<"mhlo-rank-specialization-to-scf", "FuncOp"> {
  let constructor = "createRankSpecializationToSCFPass()";
 }
--- a/include/mlir-hlo/Dialect/mhlo/transforms/passes.h
+++ b/include/mlir-hlo/Dialect/mhlo/transforms/passes.h
@ -69,10 +69,12 @@ createLegalizeTrigonometricToApproximationPass();
 std::unique_ptr<FunctionPass> createMoveUpDynamicBroadcastsForFusionPass();
-/// Rank specialization passes.
+/// Rank specialization passes:
 ///   - Find compatible operations and group them together in one rank
-///     specialization region.
+///     specialization cluster.
 ///   - Lower rank specialization clusters to SCF and ranked operations.
 std::unique_ptr<FunctionPass> createRankSpecializationClusterPass();
 std::unique_ptr<FunctionPass> createRankSpecializationToSCFPass();
 std::unique_ptr<FunctionPass> createOptimizeMhloPass();
 std::unique_ptr<FunctionPass> createLowerComplexPass();
--- a/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h
+++ b/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h
@ -100,9 +100,11 @@ void PopulateMoveUpDynamicBroadcastsForFusionLegality(ConversionTarget *target);
 void PopulateMoveUpDynamicBroadcastsForFusionPatterns(
    MLIRContext *context, OwningRewritePatternList *patterns);
-/// Populate rank specialization clustering patterns.
+/// Populate rank specialization clustering and lowering patterns.
 void PopulateRankSpecializationClusterPatterns(
    MLIRContext *context, OwningRewritePatternList *patterns);
 void PopulateRankSpecializationToSCFPatterns(
    MLIRContext *context, OwningRewritePatternList *patterns);
 }  // namespace mhlo
--- a/lib/Dialect/mhlo/transforms/rank_specialization.cc
+++ b/lib/Dialect/mhlo/transforms/rank_specialization.cc
@ -21,6 +21,7 @@ limitations under the License.
 #include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "mlir-hlo/Dialect/mhlo/transforms/passes.h"
 #include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@ -34,7 +35,7 @@ limitations under the License.
 namespace mlir {
-// Needed to build `llvm::SmallSet`s of `mlir::Value`s.
+/// Needed to build `llvm::SmallSet`s of `mlir::Value`s.
 static bool operator<(const Value &lhs, const Value &rhs) {
  return lhs.getAsOpaquePointer() < rhs.getAsOpaquePointer();
 }
@ -164,6 +165,97 @@ struct RankSpecializationClusterPass
  }
 };
 /// Lower rank specialization cluster to SCF.
 Type DeriveRankedTensorTypes(Type ty, int64_t rank) {
  auto unranked_ty = ty.dyn_cast<UnrankedTensorType>();
  if (!unranked_ty) return ty;
  SmallVector<int64_t, 8> shape(rank, ShapedType::kDynamicSize);
  return RankedTensorType::get(shape, unranked_ty.getElementType());
 }
 /// Unary element-wise operations on unranked tensors can be applied to the
 /// flattened tensor and reshaped to the expected shape afterwards.
 struct LowerUnaryRankSpecializationClusterPattern
    : public OpRewritePattern<chlo::RankSpecializationClusterOp> {
  using OpRewritePattern<chlo::RankSpecializationClusterOp>::OpRewritePattern;
  LogicalResult matchAndRewrite(chlo::RankSpecializationClusterOp op,
                                PatternRewriter &rewriter) const override {
    // Only apply this to unary operations.
    if (op.operands().size() != 1) return failure();
    // Compute flattened operand shape.
    Location loc = op.getLoc();
    Value arg = op.operands().front();
    Value shape = rewriter.create<shape::ShapeOfOp>(loc, arg);
    Value flat_shape = rewriter.create<tensor::FromElementsOp>(
        loc,
        rewriter
            .create<shape::NumElementsOp>(loc, rewriter.getIndexType(), shape)
            .result());
    // Flatten operand.
    Value flat_arg = rewriter.create<mhlo::DynamicReshapeOp>(
        loc, DeriveRankedTensorTypes(arg.getType(), /*rank=*/1), arg,
        flat_shape);
    // Materialize ranked versions of the element-wise operations.
    BlockAndValueMapping bvm;
    bvm.map(op.getBody()->getArguments().front(), flat_arg);
    for (Operation &nested_op : op.getBody()->without_terminator()) {
      auto mapped_operands = llvm::to_vector<4>(llvm::map_range(
          nested_op.getOperands(), [&](Value v) { return bvm.lookup(v); }));
      auto ranked_result_types = llvm::to_vector<2>(llvm::map_range(
          nested_op.getResultTypes(),
          [](Type ty) { return DeriveRankedTensorTypes(ty, /*rank=*/1); }));
      OperationState ranked_op_state(loc, nested_op.getName().getStringRef(),
                                     mapped_operands, ranked_result_types,
                                     nested_op.getAttrs());
      Operation *ranked_op = rewriter.createOperation(ranked_op_state);
      for (auto it :
           llvm::zip(nested_op.getResults(), ranked_op->getResults())) {
        bvm.map(std::get<0>(it), std::get<1>(it));
      }
    }
    // Collect results and restore their shape. We don't have to reify a shape
    // computation in the unary case as the operand shapes to all the
    // element-wise ops can only be the unique input shape.
    SmallVector<Value> results;
    for (Value v : llvm::cast<chlo::RankSpecializationClusterYieldOp>(
                       op.getBody()->getTerminator())
                       .results()) {
      Value flat_result = bvm.lookup(v);
      Value result = rewriter.create<mhlo::DynamicReshapeOp>(
          loc, v.getType(), flat_result, shape);
      results.push_back(result);
    }
    // Replace the rank specialization cluster.
    rewriter.replaceOp(op, results);
    return success();
  }
 };
 struct RankSpecializationToSCFPass
    : public PassWrapper<RankSpecializationToSCFPass, FunctionPass> {
  void getDependentDialects(DialectRegistry &registry) const override {
    registry.insert<mhlo::MhloDialect, chlo::HloClientDialect,
                    shape::ShapeDialect>();
  }
  void runOnFunction() override {
    MLIRContext *ctx = &getContext();
    RewritePatternSet patterns(ctx);
    PopulateRankSpecializationToSCFPatterns(ctx, &patterns);
    if (failed(
            applyPatternsAndFoldGreedily(getFunction(), std::move(patterns)))) {
      return signalPassFailure();
    }
  }
 };
 }  // namespace
 void PopulateRankSpecializationClusterPatterns(
@ -171,9 +263,18 @@ void PopulateRankSpecializationClusterPatterns(
  patterns->insert<RankSpecializationClusterPattern>(context);
 }
 void PopulateRankSpecializationToSCFPatterns(
    MLIRContext *context, OwningRewritePatternList *patterns) {
  patterns->insert<LowerUnaryRankSpecializationClusterPattern>(context);
 }
 std::unique_ptr<FunctionPass> createRankSpecializationClusterPass() {
  return std::make_unique<RankSpecializationClusterPass>();
 }
 std::unique_ptr<FunctionPass> createRankSpecializationToSCFPass() {
  return std::make_unique<RankSpecializationToSCFPass>();
 }
 }  // namespace mhlo
 }  // namespace mlir
--- a/tests/rank-specialization.mlir
+++ b/tests/rank-specialization.mlir
@ -1,4 +1,5 @@
 // RUN: mlir-hlo-opt %s --split-input-file --mhlo-rank-specialization-cluster | FileCheck %s
 // RUN: mlir-hlo-opt %s --split-input-file --mhlo-rank-specialization-cluster --mhlo-rank-specialization-to-scf | FileCheck %s --check-prefix CHECK-SCF
 // CHECK-LABEL: @add_mul
 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<*xf32>, %[[ARG1:.*]]: tensor<*xf32>, %[[ARG2:.*]]: tensor<*xf32>)
@ -37,6 +38,18 @@ func @sqrt(%arg : tensor<*xf32>) -> tensor<*xf32> {
  return %2 : tensor<*xf32>
 }
 // CHECK-SCF-LABEL: @sqrt
 // CHECK-SCF-SAME:  (%[[ARG:.*]]: tensor<*xf32>)
 // CHECK-SCF:       %[[SHAPE:.*]] = shape.shape_of %[[ARG]]
 // CHECK-SCF:       %[[N:.*]] = shape.num_elements %[[SHAPE]]
 // CHECK-SCF:       %[[FLAT_SHAPE:.*]] = tensor.from_elements %[[N]]
 // CHECK-SCF:       %[[FLAT_ARG:.*]] = "mhlo.dynamic_reshape"(%[[ARG]], %[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor<?xf32>
 // CHECK-SCF:       %[[TMP0:.*]] = "mhlo.sqrt"(%[[FLAT_ARG]]) : (tensor<?xf32>)
 // CHECK-SCF:       %[[TMP1:.*]] = "mhlo.sqrt"(%[[TMP0]]) : (tensor<?xf32>)
 // CHECK-SCF:       %[[TMP2:.*]] = "mhlo.sqrt"(%[[TMP1]]) : (tensor<?xf32>)
 // CHECK-SCF:       %[[RES:.*]] = "mhlo.dynamic_reshape"(%[[TMP2]], %[[SHAPE]]) : (tensor<?xf32>, tensor<?xindex>) -> tensor<*xf32>
 // CHECK-SCF:       return %[[RES]]
 // -----
 // Don't cluster single ranked operation.