Fix Cosh approximation for F16.

We should upcast F16 to F32 to prevent precision loss. E.g. cosh(-9) would evaluate to 4042 previously instead of 4052. This allows to enable the MLIR generated kernel for F16 type. Also move template instantiation for Sinh to inside the #ifdef block. This was missed in a previous commit. PiperOrigin-RevId: 378635042
2021-06-10 06:16:00 -07:00 · 2021-06-10 06:16:00 -07:00 · 6088eb697c
parent 837a1de7c5
commit 6088eb697c
3 changed files with 76 additions and 31 deletions
--- a/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc
+++ b/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc
@ -648,6 +648,51 @@ Value MaterializeLgamma(ConversionPatternRewriter &rewriter, Location loc,
      lgamma);
 }
 // Express `cosh` as
 //   cosh(x) = (e^x + e^-x) / 2
 //           = e^(x + log(1/2)) + e^(-x + log(1/2))
 //
 // The second formulation avoids overflowing when e^x = inf but (e^x)/2 is not.
 //
 // This incorrectly overflows to inf for two f32 input values, namely
 // +/-89.4159851, due to rounding error when computing x +/- log(1/2).  The
 // correct answer of 3.40281961e+38 (0x7f7fffec) is very close to max-float, so
 // we deem this acceptable.
 Value MaterializeCoshApproximation(ConversionPatternRewriter &rewriter,
                                   Location loc, ValueRange operands) {
  CoshOp::Adaptor transformed(operands);
  Value x = transformed.operand();
  Value log_one_half =
      rewriter.create<mhlo::LogOp>(loc, getConstantLike(rewriter, loc, 0.5, x));
  Value exp_add = rewriter.create<mhlo::ExpOp>(
      loc, rewriter.create<mhlo::AddOp>(loc, x, log_one_half));
  Value exp_sub = rewriter.create<mhlo::ExpOp>(
      loc, rewriter.create<mhlo::SubOp>(loc, log_one_half, x));
  return rewriter.create<mhlo::AddOp>(loc, exp_add, exp_sub);
 }
 struct ConvertCoshOp : public OpConversionPattern<CoshOp> {
  using OpConversionPattern<CoshOp>::OpConversionPattern;
  LogicalResult matchAndRewrite(
      CoshOp op, ArrayRef<Value> operands,
      ConversionPatternRewriter &rewriter) const override {
    CoshOp::Adaptor transformed(operands);
    Value x = transformed.operand();
    if (x.getType().cast<ShapedType>().getElementType().isa<ComplexType>()) {
      // TODO(hinsu): Support operands with complex element types by always
      // using the formula for large x. The compare op is not legal for complex
      // numbers.
      return failure();
    }
    rewriter.replaceOp(op,
                       MaterializeWithUpcast(rewriter, op.getLoc(), operands,
                                             rewriter.getF32Type(),
                                             &MaterializeCoshApproximation));
    return success();
  }
 };
 // Compute the Digamma function using Lanczos' approximation from "A Precision
 // Approximation of the Gamma Function". SIAM Journal on Numerical Analysis
 // series B. Vol. 1:
@ -1318,7 +1363,8 @@ void PopulateDecomposeChloPatterns(MLIRContext *context,
  // Other patterns.
  // clang-format off
-  patterns->insert<ConvertDigammaOp,
+  patterns->insert<ConvertCoshOp,
                   ConvertDigammaOp,
                   ConvertErfOp,
                   ConvertErfcOp,
                   ConvertLgammaOp,
--- a/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_patterns.td
+++ b/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_patterns.td
@ -255,36 +255,6 @@ def : Pat<(HLOClient_AtanhOp NonComplexElementType:$input),
 def : Pat<(HLOClient_ConjOp $v),
          (HLO_ComplexOp (HLO_RealOp $v), (HLO_NegOp (HLO_ImagOp $v)))>;
 // Express `cosh` as
 //   cosh(x) = (e^x + e^-x) / 2
 //           = e^(x + log(1/2)) + e^(-x + log(1/2))
 //
 // The second formulation avoids overflowing when e^x = inf but (e^x)/2 is not.
 //
 // This incorrectly overflows to inf for two f32 input values, namely
 // +/-89.4159851, due to rounding error when computing x +/- log(1/2).  The
 // correct answer of 3.40281961e+38 (0x7f7fffec) is very close to max-float, so
 // we deem this acceptable.
 def : Pat<(HLOClient_CoshOp NonComplexElementType:$input),
  (HLO_AddOp
    (HLO_ExpOp
      (HLO_AddOp
        $input,
        (HLO_LogOp
          (HLO_ConstantLike<"0.5"> $input)
        )
      )
    ),
    (HLO_ExpOp
      (HLO_AddOp
        (HLO_NegOp $input),
        (HLO_LogOp
          (HLO_ConstantLike<"0.5"> $input)
        )
      )
    )
  )>;
 // Express `is_inf` as
 //   is_inf(x) = is_pos_inf(|x|)
 def : Pat<(HLOClient_IsInfOp NonComplexElementType:$input),
--- a/tests/chlo_legalize_to_mhlo.mlir
+++ b/tests/chlo_legalize_to_mhlo.mlir
@ -2187,3 +2187,32 @@ func @sinh_complex(%x : tensor<2xcomplex<f32>>) -> tensor<2xcomplex<f32>> {
  %1 = chlo.sinh %x : tensor<2xcomplex<f32>> -> tensor<2xcomplex<f32>>
  return %1 : tensor<2xcomplex<f32>>
 }
 // ----
 // CHECK-LABEL: @cosh_f32
 // CHECK-SAME: (%[[X:.*]]: tensor<f32>)
 func @cosh_f32(%x : tensor<f32>) -> tensor<f32> {
  // CHECK: %[[HALF:.*]] = mhlo.constant dense<5.000000e-01> : tensor<f32>
  // CHECK: %[[LOG_HALF:.*]] = "mhlo.log"(%[[HALF]]) : (tensor<f32>) -> tensor<f32>
  // CHECK: %[[X_PLUS_LOG_HALF:.*]] = mhlo.add %[[X]], %[[LOG_HALF]] : tensor<f32>
  // CHECK: %[[EXP_1:.*]] = "mhlo.exponential"(%[[X_PLUS_LOG_HALF]]) : (tensor<f32>) -> tensor<f32>
  // CHECK: %[[LOG_HALF_MINUS_X:.*]] = mhlo.subtract %[[LOG_HALF]], %[[X]] : tensor<f32>
  // CHECK: %[[EXP_2:.*]] = "mhlo.exponential"(%[[LOG_HALF_MINUS_X]]) : (tensor<f32>) -> tensor<f32>
  // CHECK: %[[RESULT:.*]] = mhlo.add %[[EXP_1]], %[[EXP_2]] : tensor<f32>
  // CHECK: return %[[RESULT]] : tensor<f32>
  %1 = chlo.cosh %x : tensor<f32> -> tensor<f32>
  return %1 : tensor<f32>
 }
 // ----
 // CHECK-LABEL: @cosh_f16
 // CHECK-SAME: (%[[ARG0:.*]]: tensor<f16>)
 func @cosh_f16(%x : tensor<f16>) -> tensor<f16> {
  // CHECK: "mhlo.convert"(%[[ARG0]]) : (tensor<f16>) -> tensor<f32>
  // CHECK: %[[RES:.*]] = "mhlo.convert"(%{{.*}}) : (tensor<f32>) -> tensor<f16>
  // CHECK: return %[[RES]]
  %1 = chlo.cosh %x : tensor<f16> -> tensor<f16>
  return %1 : tensor<f16>
 }