/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ // This file implements logic for lowering HLO dialect to LHLO dialect. #include "third_party/absl/memory/memory.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/AffineMap.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Attributes.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/BlockAndValueMapping.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Builders.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Function.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Location.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/MLIRContext.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/Operation.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/PatternMatch.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/IR/StandardTypes.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/Pass/Pass.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/Transforms/BufferPlacement.h" #include "third_party/llvm/llvm-project/mlir/include/mlir/Transforms/DialectConversion.h" #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h" #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" #include "third_party/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" namespace mlir { namespace mhlo { namespace { template using BaseOpConversion = BufferAssignmentOpConversionPattern; using StdReturnOpConverter = detail::BufferAssignmentReturnOpConverter; Value InsertDynamicAllocAndDealloc(Location loc, Value result, Value shape_operand, ConversionPatternRewriter* rewriter) { auto result_type = result.getType().dyn_cast(); if (!result_type) { result.getDefiningOp()->emitOpError() << "tensor to buffer conversion expects ranked results"; } auto memref_type = MemRefType::get(result_type.getShape(), result_type.getElementType()); Operation* op = result.getDefiningOp(); // Extract the required element out of the vector. SmallVector dynamic_operands; for (auto shape_element : llvm::enumerate(result_type.getShape())) { if (shape_element.value() != ShapedType::kDynamicSize) continue; Value index = rewriter->create( loc, rewriter->getIntegerAttr(rewriter->getIndexType(), shape_element.index())); Value alloc_operand = rewriter->create(loc, shape_operand, ValueRange{index}); if (!alloc_operand.getType().isIndex()) { alloc_operand = rewriter->create(loc, alloc_operand, rewriter->getIndexType()); } dynamic_operands.push_back(alloc_operand); } // Insert in front of op to ensure sizes are available. OpBuilder allocBuilder(op); auto alloc = allocBuilder.create(loc, memref_type, dynamic_operands); return alloc; } Value InsertAlloc(Location loc, OpResult result, BufferAssignmentPlacer* bufferAssignment, ConversionPatternRewriter* rewriter) { auto result_type = result.getType().dyn_cast(); if (!result_type || !result_type.hasStaticShape()) { result.getDefiningOp()->emitOpError() << "tensor to buffer conversion expects statically shaped results"; } auto memref_type = MemRefType::get(result_type.getShape(), result_type.getElementType()); OpBuilder::InsertionGuard guard(*rewriter); rewriter->restoreInsertionPoint( bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter->create(loc, memref_type); return alloc; } template class HloToLhloOpConverter : public BaseOpConversion { public: using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( HloOpTy hloOp, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { Operation* op = hloOp.getOperation(); const auto& original_results = op->getResults(); SmallVector buffer_args(operands.begin(), operands.end()); for (auto result : llvm::enumerate(original_results)) { RankedTensorType resultType = result.value().getType().dyn_cast(); if (!resultType) { return failure(); } if (resultType.hasStaticShape()) { buffer_args.push_back(InsertAlloc(op->getLoc(), result.value(), this->bufferAssignment, &rewriter)); } else { SmallVector results_shape; auto shape_type_op = dyn_cast(op); if (!shape_type_op) return failure(); if (failed( shape_type_op.reifyReturnTypeShapes(rewriter, results_shape))) return failure(); buffer_args.push_back(InsertDynamicAllocAndDealloc( op->getLoc(), result.value(), results_shape.front(), &rewriter)); } } rewriter.create>(op->getLoc(), llvm::None, buffer_args, op->getAttrs()); rewriter.replaceOp(op, ArrayRef(buffer_args).slice(operands.size())); return success(); } }; struct HloToLhloDynamicBroadcastInDimOpConverter : public BaseOpConversion { public: using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( mhlo::DynamicBroadcastInDimOp op, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { auto loc = op.getLoc(); Value resultBuffer = InsertDynamicAllocAndDealloc( loc, op.getResult(), op.output_dimensions(), &rewriter); Value transformed_operand = InsertDynamicMemrefCastOp(op, operands.front(), &rewriter); rewriter.create( loc, transformed_operand, resultBuffer, op.broadcast_dimensions()); rewriter.replaceOp(op, {resultBuffer}); return success(); } private: // Inserts dynamic memref to change the layout of the memref to put 0-stride // and size of the target dimension if size-1 dimension expansion is // necessary. xla_lhlo::DynamicMemRefCastOp InsertDynamicMemrefCastOp( mhlo::DynamicBroadcastInDimOp op, Value operand, OpBuilder* b) const { auto loc = op.getLoc(); auto operand_type = operand.getType().cast(); auto operand_shape = operand_type.getShape(); SmallVector sizes, strides; sizes.reserve(operand_shape.size()); strides.reserve(operand_shape.size()); Value zero = b->create(loc, 0); Value one = b->create(loc, 1); for (auto dim : llvm::enumerate(op.broadcast_dimensions())) { Value broadcast_dim_value = b->create(loc, dim.value().getSExtValue()); Value result_dim_size = b->create( loc, op.output_dimensions(), broadcast_dim_value); Value operand_dim_size = ShapedType::isDynamic(operand_shape[dim.index()]) ? b->create(loc, operand, dim.index()).getResult() : b->create(loc, operand_shape[dim.index()]) .getResult(); // TODO(pifon): Revisit if this cast is needed. Maybe we can use // tensor for `output_dimensions` as well. if (!result_dim_size.getType().isIndex()) { result_dim_size = b->create(loc, result_dim_size, b->getIndexType()); } // There can be two cases: // 1) Operand dim == result dim => expansion is not needed => stride := 1. // 2) Operand dim < result dim => expansion is needed => stride := 0. Value is_expansion = b->create(loc, CmpIPredicate::slt, operand_dim_size, result_dim_size); strides.push_back( b->create(loc, is_expansion, zero, one)); // Size of input dim can be set to the size of the corresponding output // dimension for both cases. sizes.push_back(result_dim_size); } // Type-erased memref type with static rank, dynamic sizes and strides. SmallVector dynamic_layout(operand_shape.size(), MemRefType::kDynamicStrideOrOffset); SmallVector dynamic_shape(operand_shape.size(), MemRefType::kDynamicSize); auto type_erased_memref_type = MemRefType::get( dynamic_shape, operand_type.getElementType(), makeStridedLinearLayoutMap(dynamic_layout, /*offset=*/0, b->getContext())); auto transformed_operand = b->create( loc, type_erased_memref_type, operand, sizes, strides); return transformed_operand; } }; struct HloToLhloReduceOpConverter : public BaseOpConversion { public: using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( mhlo::ReduceOp op, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { auto loc = op.getLoc(); // TODO(b/137624192) Implement variadic reduce. if (op.getNumResults() != 1) return failure(); if (!llvm::hasSingleElement(op.body())) { return op.emitOpError() << "tensor to buffer conversion expects a single block " "in the region containing the operation"; } const auto& original_results = op.getResults(); SmallVector buffer_args(operands.begin(), operands.end()); for (auto result : original_results) { buffer_args.push_back( InsertAlloc(loc, result, this->bufferAssignment, &rewriter)); } auto new_op = rewriter.create( loc, llvm::None, buffer_args, op.getAttrs()); // Copy over the operations inside the region. rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end()); // Create new block arguments with correct type. auto& entry_block = new_op.body().front(); int original_arg_count = entry_block.getNumArguments(); for (int i = 0; i < original_arg_count; ++i) { auto old_arg = entry_block.getArgument(i); auto old_type = old_arg.getType().cast(); auto new_type = MemRefType::get(old_type.getShape(), old_type.getElementType()); auto new_arg = entry_block.addArgument(new_type); rewriter.replaceUsesOfBlockArgument(old_arg, new_arg); } // Add an argument for the result. entry_block.addArgument( entry_block.getArgument(original_arg_count).getType()); // Remove the old arguments. for (int i = original_arg_count - 1; i >= 0; --i) { entry_block.eraseArgument(i); } // Insert terminator at the end. rewriter.setInsertionPointToEnd(&entry_block); rewriter.create(loc); rewriter.replaceOp(op, ArrayRef(buffer_args).slice(operands.size())); return success(); } }; class HloToLhloTensorLoadOpConverter : public BaseOpConversion { public: using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( mlir::TensorLoadOp op, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { rewriter.replaceOp(op, operands); return success(); } }; // TODO(b/137624192): Rewrite into a copy and elide copy if possible. class HloToLhloTensorStoreOpConverter : public BaseOpConversion { public: using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( mlir::TensorStoreOp op, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { rewriter.replaceOpWithNewOp( op, llvm::None, operands.front(), operands.back()); return success(); } }; // Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary // buffers if necessary. // // Example fusion with HLO ops. // // func @fusion(%arg0: memref<2x2xf32>, // %arg1: memref<2x2xf32>, // %arg2: memref<2x2xf32>, // %arg3: memref<2x2xf32>) { // "xla_lhlo.fusion"() ({ // %0 = tensor_load %arg1 : memref<2x2xf32> // %1 = tensor_load %arg2 : memref<2x2xf32> // %2 = "mhlo.add"(%0, %1) : // (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> // %3 = tensor_load %arg0 : memref<2x2xf32> // %4 = "mhlo.multiply"(%2, %3) : // (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> // tensor_store %4, %arg3 : memref<2x2xf32> // "xla_lhlo.terminator"() : () -> () // }) : () -> () // return // } // // Transformed fusion with LHLO ops. // func @fusion(%arg0: memref<2x2xf32>, // %arg1: memref<2x2xf32>, // %arg2: memref<2x2xf32>, // %arg3: memref<2x2xf32>) { // "xla_lhlo.fusion"() ( { // %0 = alloc() : memref<2x2xf32> // "xla_lhlo.add"(%arg1, %arg2, %0) : // (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () // "xla_lhlo.multiply"(%0, %arg0, %arg3) : // (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () // "xla_lhlo.terminator"() : () -> () // }) : () -> () // return // } // // FuncOp signature conversion example: // // func @func_op(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { // %0 = "mhlo.maximum"(%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> // tensor<4xf32> %1 = "mhlo.add"(%arg0, %0) : (tensor<4xf32>, // tensor<4xf32>) -> tensor<4xf32> return %1 : tensor<4xf32> // } // // Transformed function with an extra argument for the result. The types have // been converted from tensor to memref. // // func @func_op(%arg0: memref<4xf32>, // %arg1: memref<4xf32>, // %arg2: memref<4xf32>) { // %0 = alloc() : memref<4xf32> // "xla_lhlo.maximum"(%arg0, %arg1, %0) : // (memref<4xf32>, memref<4xf32>, memref<4xf32>) -> () // %1 = alloc() : memref<4xf32> // "xla_lhlo.add"(%arg0, %0, %1) : // (memref<4xf32>, memref<4xf32>, memref<4xf32>) -> () // "xla_lhlo.copy"(%1, %arg2) : (memref<4xf32>, memref<4xf32>) -> () // "xla_lhlo.terminator"() : () -> () // } struct HloLegalizeToLhlo : public PassWrapper> { public: HloLegalizeToLhlo() = default; HloLegalizeToLhlo(const HloLegalizeToLhlo& o) { this->results_escape_function = o.results_escape_function.getValue(); } explicit HloLegalizeToLhlo(bool results_escape_function) { this->results_escape_function.setValue(results_escape_function); } void runOnOperation() override { OwningRewritePatternList patterns; auto& context = getContext(); ConversionTarget target(context); target.addLegalDialect(); target.addLegalDialect(); target.addLegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addLegalOp(); target.addLegalOp(); target.addIllegalDialect(); BufferAssignmentTypeConverter converter; target.addDynamicallyLegalOp([&](FuncOp op) { auto inputs = op.getType().getInputs(); return llvm::all_of(inputs, [](Type input) { return input.isa(); }) && converter.isLegal(&op.getBody()); }); target.addDynamicallyLegalOp([&](mlir::ReturnOp returnOp) { return std::all_of(returnOp.operand_type_begin(), returnOp.operand_type_end(), [](Type type) { return type.isa(); }); }); auto module = getOperation(); WalkResult result = module.walk([&](FuncOp func) -> WalkResult { BufferAssignmentPlacer bufferAssignment(func); OwningRewritePatternList patterns; populateHLOToLHLOConversionPattern(func.getContext(), &bufferAssignment, &converter, &patterns); if (results_escape_function) { populateWithBufferAssignmentOpConversionPatterns< mlir::ReturnOp, mlir::ReturnOp, xla_lhlo::CopyOp, /*allowMemrefFunctionResults=*/true>(&context, &bufferAssignment, &converter, &patterns); } else { populateWithBufferAssignmentOpConversionPatterns< mlir::ReturnOp, mlir::ReturnOp, xla_lhlo::CopyOp, /*allowMemrefFunctionResults=*/false>(&context, &bufferAssignment, &converter, &patterns); } return applyPartialConversion(func, target, patterns); }); if (result.wasInterrupted()) { signalPassFailure(); } } private: Option results_escape_function{ *this, "results-escape-function", llvm::cl::desc( "Allocate the results of functions within the functions body"), llvm::cl::init(false)}; }; } // namespace void populateHLOToLHLOConversionPattern( MLIRContext* context, BufferAssignmentPlacer* bufferAssignment, TypeConverter* converter, OwningRewritePatternList* patterns) { // clang-format off patterns->insert< HloToLhloDynamicBroadcastInDimOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloReduceOpConverter, HloToLhloTensorLoadOpConverter, HloToLhloTensorStoreOpConverter >(context, bufferAssignment, converter); // clang-format on } std::unique_ptr> createLegalizeToLhloPass( bool results_escape_function) { return absl::make_unique(results_escape_function); } static PassRegistration legalize_pass( "hlo-legalize-to-lhlo", "Legalize from HLO dialect to LHLO dialect"); } // namespace mhlo } // namespace mlir