[MLIR][KernelGen] Add approximation lowering for mhlo.erf operation on f64

PiperOrigin-RevId: 352977456
This commit is contained in:
A. Unique TensorFlower 2021-01-21 02:47:43 -08:00 committed by TensorFlow MLIR Team
parent 5bbe5d348c
commit bec2e625a2
2 changed files with 319 additions and 9 deletions

View File

@ -76,11 +76,12 @@ struct ConvertConstantLikeOp : public OpConversionPattern<ConstantLikeOp> {
}
};
Value MaterializePolynomialApproximation(
ConversionPatternRewriter &rewriter, Location loc, Value x,
const std::vector<float> &coefficients) {
template <typename FTy>
Value MaterializePolynomialApproximation(ConversionPatternRewriter &rewriter,
Location loc, Value x,
const std::vector<FTy> &coefficients) {
Value poly = chlo::getConstantLike(rewriter, loc, 0.0, x);
for (float c : coefficients) {
for (FTy c : coefficients) {
poly = rewriter.create<mhlo::MulOp>(loc, x.getType(), poly, x);
poly = rewriter.create<mhlo::AddOp>(
loc, x.getType(), poly, chlo::getConstantLike(rewriter, loc, c, x));
@ -88,6 +89,142 @@ Value MaterializePolynomialApproximation(
return poly;
}
// Precondition is |x| >= 1. Use erf approximation, otherwise.
//
// We rely on multiple polynomial approximations for x >= 1. We pass |x| as an
// argument and derive the final approximation for all |x| >= 1.
// This implementation is based on Cephes.
Value MaterializeErfcApproximationF64ForMagnituteGEOne(
ConversionPatternRewriter &rewriter, Location loc, Value x) {
const double kMaxlog = 7.09782712893383996843E2;
const std::vector<double> kErfcPCoefficients{
2.46196981473530512524E-10, 5.64189564831068821977E-1,
7.46321056442269912687E0, 4.86371970985681366614E1,
1.96520832956077098242E2, 5.26445194995477358631E2,
9.34528527171957607540E2, 1.02755188689515710272E3,
5.57535335369399327526E2};
const std::vector<double> kErfcQCoefficients{
1.00000000000000000000E0, 1.32281951154744992508E1,
8.67072140885989742329E1, 3.54937778887819891062E2,
9.75708501743205489753E2, 1.82390916687909736289E3,
2.24633760818710981792E3, 1.65666309194161350182E3,
5.57535340817727675546E2};
const std::vector<double> kErfcRCoefficients{
5.64189583547755073984E-1, 1.27536670759978104416E0,
5.01905042251180477414E0, 6.16021097993053585195E0,
7.40974269950448939160E0, 2.97886665372100240670E0};
const std::vector<double> kErfcSCoefficients{
1.00000000000000000000E0, 2.26052863220117276590E0,
9.39603524938001434673E0, 1.20489539808096656605E1,
1.70814450747565897222E1, 9.60896809063285878198E0,
3.36907645100081516050E0};
// Let z = -x^2.
Value x_sq = rewriter.create<mhlo::MulOp>(loc, x, x);
Value z = rewriter.create<mhlo::NegOp>(loc, x_sq);
// Materialize polynomial approximation for x in [1, 8) as
// erfc(x) = exp(z) P(|x|) / Q(|x|).
Value exp_z = rewriter.create<mhlo::ExpOp>(loc, z);
Value abs_x = rewriter.create<mhlo::AbsOp>(loc, x);
Value poly_p = MaterializePolynomialApproximation(rewriter, loc, abs_x,
kErfcPCoefficients);
Value exp_z_mul_poly_p = rewriter.create<mhlo::MulOp>(loc, exp_z, poly_p);
Value poly_q = MaterializePolynomialApproximation(rewriter, loc, abs_x,
kErfcQCoefficients);
Value erfc_approx_1_8 =
rewriter.create<mhlo::DivOp>(loc, exp_z_mul_poly_p, poly_q);
// Materialize polynomial approximation for x in >= 8 as
// erfc(x) exp(z) R(|x|) / S(|x|).
Value poly_r = MaterializePolynomialApproximation(rewriter, loc, abs_x,
kErfcRCoefficients);
Value exp_z_mul_poly_r = rewriter.create<mhlo::MulOp>(loc, exp_z, poly_r);
Value poly_s = MaterializePolynomialApproximation(rewriter, loc, abs_x,
kErfcSCoefficients);
Value erfc_approx_8_inf =
rewriter.create<mhlo::DivOp>(loc, exp_z_mul_poly_r, poly_s);
// Combine polynomial approximations for x >= 1.
const StringAttr kLT = rewriter.getStringAttr(
mhlo::stringifyComparisonDirection(mhlo::ComparisonDirection::LT));
Value eight = chlo::getConstantLike(rewriter, loc, 8.0, x);
Value abs_x_lt_8 = rewriter.create<mhlo::CompareOp>(loc, abs_x, eight, kLT);
Value erfc_approx = rewriter.create<mhlo::SelectOp>(
loc, abs_x_lt_8, erfc_approx_1_8, erfc_approx_8_inf);
// Clamp to prevent overflow and materialize approximation for large x as
// erfc(x) = 0.
Value z_lt_neg_maxlog = rewriter.create<mhlo::CompareOp>(
loc, z, chlo::getConstantLike(rewriter, loc, -kMaxlog, x), kLT);
Value zero = chlo::getConstantLike(rewriter, loc, 0.0, x);
Value erfc_approx_clamped =
rewriter.create<mhlo::SelectOp>(loc, z_lt_neg_maxlog, zero, erfc_approx);
// Derive approximation for x <= -1 as
// erfc(x) = 2 - erfc(-x).
// Reuse previously materialized approximations all of which take |x| as their
// argument.
Value x_lt_zero = rewriter.create<mhlo::CompareOp>(loc, x, zero, kLT);
Value two = chlo::getConstantLike(rewriter, loc, 2.0, x);
Value two_sub_erfc_approx_clamped =
rewriter.create<mhlo::SubOp>(loc, two, erfc_approx_clamped);
return rewriter.create<mhlo::SelectOp>(
loc, x_lt_zero, two_sub_erfc_approx_clamped, erfc_approx_clamped);
}
// Precondition is |x| <= 1. Use erfc approximation, otherwise.
// This implementation is based on Cephes.
Value MaterializeErfApproximationF64ForMagnituteLEOne(
ConversionPatternRewriter &rewriter, Location loc, Value x) {
const std::vector<double> kErfTCoefficients{
9.60497373987051638749E0, 9.00260197203842689217E1,
2.23200534594684319226E3, 7.00332514112805075473E3,
5.55923013010394962768E4};
const std::vector<double> kErfUCoefficients{
1.00000000000000000000E0, 3.35617141647503099647E1,
5.21357949780152679795E2, 4.59432382970980127987E3,
2.26290000613890934246E4, 4.92673942608635921086E4};
// Materialize polynomial approximation for |x| <= 1 as
// erf(x) = x T(x^2) / U(x^2).
Value x_sq = rewriter.create<mhlo::MulOp>(loc, x, x);
Value poly_t = MaterializePolynomialApproximation(rewriter, loc, x_sq,
kErfTCoefficients);
Value x_mul_poly_t = rewriter.create<mhlo::MulOp>(loc, x, poly_t);
Value poly_u = MaterializePolynomialApproximation(rewriter, loc, x_sq,
kErfUCoefficients);
return rewriter.create<mhlo::DivOp>(loc, x_mul_poly_t, poly_u);
}
// This implementation is based on Cephes.
Value MaterializeErfApproximationF64(ConversionPatternRewriter &rewriter,
Location loc, Value x) {
assert(x.getType().cast<RankedTensorType>().getElementType().isF64() &&
"expect f64 element type");
// Rely on erf approximation for |x| < 1
// erf(x) = erf_approx(x)
Value erf_approx =
MaterializeErfApproximationF64ForMagnituteLEOne(rewriter, loc, x);
// Rely on erfc approximation for |x| >= 1 and materialize erf as
// erf(x) = 1 - erfc_approx(x)
Value one = chlo::getConstantLike(rewriter, loc, 1.0, x);
Value erfc_approx =
MaterializeErfcApproximationF64ForMagnituteGEOne(rewriter, loc, x);
Value erfc_based_approx = rewriter.create<mhlo::SubOp>(loc, one, erfc_approx);
// Materialize approximation selection based on argument.
Value abs_x = rewriter.create<mhlo::AbsOp>(loc, x);
const StringAttr kLT = rewriter.getStringAttr(
mhlo::stringifyComparisonDirection(mhlo::ComparisonDirection::LT));
Value abs_x_lt_one = rewriter.create<mhlo::CompareOp>(loc, abs_x, one, kLT);
return rewriter.create<mhlo::SelectOp>(loc, abs_x_lt_one, erf_approx,
erfc_based_approx);
}
// This is the same approximation as used in Eigen.
Value MaterializeErfApproximationF32(ConversionPatternRewriter &rewriter,
Location loc, Value operand) {
assert(operand.getType().cast<RankedTensorType>().getElementType().isF32() &&
@ -109,13 +246,14 @@ Value MaterializeErfApproximationF32(ConversionPatternRewriter &rewriter,
rewriter.create<mhlo::ClampOp>(loc, operand.getType(), lb, operand, ub);
Value x_sq = rewriter.create<mhlo::MulOp>(loc, x, x);
// Materialize polynomial approximation for x in [-4, 4].
// Materialize polynomial approximation for x in [-4, 4] as
// erf(x) = x * Alpha(x^2) / Beta(x^2).
Value alpha_poly =
MaterializePolynomialApproximation(rewriter, loc, x_sq, kAlpha);
Value beta_poly =
MaterializePolynomialApproximation(rewriter, loc, x_sq, kBeta);
Value mul_x_alpha_poly = rewriter.create<mhlo::MulOp>(loc, x, alpha_poly);
return rewriter.create<mhlo::DivOp>(loc, mul_x_alpha_poly, beta_poly);
Value x_mul_alpha_poly = rewriter.create<mhlo::MulOp>(loc, x, alpha_poly);
return rewriter.create<mhlo::DivOp>(loc, x_mul_alpha_poly, beta_poly);
}
struct ConvertErfOp : public OpConversionPattern<ErfOp> {
@ -128,8 +266,13 @@ struct ConvertErfOp : public OpConversionPattern<ErfOp> {
Value x = transformed.operand();
Type ty = x.getType().cast<RankedTensorType>().getElementType();
// For now, we support only f32 and f16.
if (!ty.isF32() && !ty.isF16()) return failure();
// For now, we support only f64, f32, and f16.
if (!ty.isF64() && !ty.isF32() && !ty.isF16()) return failure();
if (ty.isF64()) {
rewriter.replaceOp(op, MaterializeErfApproximationF64(rewriter, loc, x));
return success();
}
// Cast argument to f32 tensor if needed.
assert((ty.isF16() || ty.isF32()) && "expect f16 or f32 at this point");

View File

@ -110,6 +110,173 @@ func @conj(%arg0: tensor<3xcomplex<f32>>) -> tensor<3xcomplex<f32>> {
return %1 : tensor<3xcomplex<f32>>
}
// CHECK-LABEL: @erf_f64
// CHECK-SAME: %[[ARG:.*]]: tensor<f64>
func @erf_f64(%arg : tensor<f64>) -> tensor<f64> {
// CHECK: %[[TMP_0:.*]] = mhlo.multiply %[[ARG]], %[[ARG]]
// CHECK: %[[TMP_1:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_2:.*]] = mhlo.multiply %[[TMP_1]], %[[TMP_0]]
// CHECK: %[[TMP_3:.*]] = mhlo.constant dense<9.6049737398705161>
// CHECK: %[[TMP_4:.*]] = mhlo.add %[[TMP_2]], %[[TMP_3]]
// CHECK: %[[TMP_5:.*]] = mhlo.multiply %[[TMP_4]], %[[TMP_0]]
// CHECK: %[[TMP_6:.*]] = mhlo.constant dense<90.026019720384269>
// CHECK: %[[TMP_7:.*]] = mhlo.add %[[TMP_5]], %[[TMP_6]]
// CHECK: %[[TMP_8:.*]] = mhlo.multiply %[[TMP_7]], %[[TMP_0]]
// CHECK: %[[TMP_9:.*]] = mhlo.constant dense<2232.0053459468431>
// CHECK: %[[TMP_10:.*]] = mhlo.add %[[TMP_8]], %[[TMP_9]]
// CHECK: %[[TMP_11:.*]] = mhlo.multiply %[[TMP_10]], %[[TMP_0]]
// CHECK: %[[TMP_12:.*]] = mhlo.constant dense<7003.3251411280507>
// CHECK: %[[TMP_13:.*]] = mhlo.add %[[TMP_11]], %[[TMP_12]]
// CHECK: %[[TMP_14:.*]] = mhlo.multiply %[[TMP_13]], %[[TMP_0]]
// CHECK: %[[TMP_15:.*]] = mhlo.constant dense<55592.301301039493>
// CHECK: %[[TMP_16:.*]] = mhlo.add %[[TMP_14]], %[[TMP_15]]
// CHECK: %[[TMP_17:.*]] = mhlo.multiply %[[ARG]], %[[TMP_16]]
// CHECK: %[[TMP_18:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_19:.*]] = mhlo.multiply %[[TMP_18]], %[[TMP_0]]
// CHECK: %[[TMP_20:.*]] = mhlo.constant dense<1.000000e+00>
// CHECK: %[[TMP_21:.*]] = mhlo.add %[[TMP_19]], %[[TMP_20]]
// CHECK: %[[TMP_22:.*]] = mhlo.multiply %[[TMP_21]], %[[TMP_0]]
// CHECK: %[[TMP_23:.*]] = mhlo.constant dense<33.561714164750313>
// CHECK: %[[TMP_24:.*]] = mhlo.add %[[TMP_22]], %[[TMP_23]]
// CHECK: %[[TMP_25:.*]] = mhlo.multiply %[[TMP_24]], %[[TMP_0]]
// CHECK: %[[TMP_26:.*]] = mhlo.constant dense<521.35794978015269>
// CHECK: %[[TMP_27:.*]] = mhlo.add %[[TMP_25]], %[[TMP_26]]
// CHECK: %[[TMP_28:.*]] = mhlo.multiply %[[TMP_27]], %[[TMP_0]]
// CHECK: %[[TMP_29:.*]] = mhlo.constant dense<4594.3238297098014>
// CHECK: %[[TMP_30:.*]] = mhlo.add %[[TMP_28]], %[[TMP_29]]
// CHECK: %[[TMP_31:.*]] = mhlo.multiply %[[TMP_30]], %[[TMP_0]]
// CHECK: %[[TMP_32:.*]] = mhlo.constant dense<22629.000061389095>
// CHECK: %[[TMP_33:.*]] = mhlo.add %[[TMP_31]], %[[TMP_32]]
// CHECK: %[[TMP_34:.*]] = mhlo.multiply %[[TMP_33]], %[[TMP_0]]
// CHECK: %[[TMP_35:.*]] = mhlo.constant dense<49267.394260863592>
// CHECK: %[[TMP_36:.*]] = mhlo.add %[[TMP_34]], %[[TMP_35]]
// CHECK: %[[TMP_37:.*]] = mhlo.divide %[[TMP_17]], %[[TMP_36]]
// CHECK: %[[TMP_38:.*]] = mhlo.constant dense<1.000000e+00>
// CHECK: %[[TMP_39:.*]] = mhlo.multiply %[[ARG]], %[[ARG]]
// CHECK: %[[TMP_40:.*]] = "mhlo.negate"(%[[TMP_39]])
// CHECK: %[[TMP_41:.*]] = "mhlo.exponential"(%[[TMP_40]])
// CHECK: %[[TMP_42:.*]] = "mhlo.abs"(%[[ARG]])
// CHECK: %[[TMP_43:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_44:.*]] = mhlo.multiply %[[TMP_43]], %[[TMP_42]]
// CHECK: %[[TMP_45:.*]] = mhlo.constant dense<2.4619698147353052E-10>
// CHECK: %[[TMP_46:.*]] = mhlo.add %[[TMP_44]], %[[TMP_45]]
// CHECK: %[[TMP_47:.*]] = mhlo.multiply %[[TMP_46]], %[[TMP_42]]
// CHECK: %[[TMP_48:.*]] = mhlo.constant dense<0.56418956483106886>
// CHECK: %[[TMP_49:.*]] = mhlo.add %[[TMP_47]], %[[TMP_48]]
// CHECK: %[[TMP_50:.*]] = mhlo.multiply %[[TMP_49]], %[[TMP_42]]
// CHECK: %[[TMP_51:.*]] = mhlo.constant dense<7.4632105644226989>
// CHECK: %[[TMP_52:.*]] = mhlo.add %[[TMP_50]], %[[TMP_51]]
// CHECK: %[[TMP_53:.*]] = mhlo.multiply %[[TMP_52]], %[[TMP_42]]
// CHECK: %[[TMP_54:.*]] = mhlo.constant dense<48.637197098568137>
// CHECK: %[[TMP_55:.*]] = mhlo.add %[[TMP_53]], %[[TMP_54]]
// CHECK: %[[TMP_56:.*]] = mhlo.multiply %[[TMP_55]], %[[TMP_42]]
// CHECK: %[[TMP_57:.*]] = mhlo.constant dense<196.5208329560771>
// CHECK: %[[TMP_58:.*]] = mhlo.add %[[TMP_56]], %[[TMP_57]]
// CHECK: %[[TMP_59:.*]] = mhlo.multiply %[[TMP_58]], %[[TMP_42]]
// CHECK: %[[TMP_60:.*]] = mhlo.constant dense<526.44519499547732>
// CHECK: %[[TMP_61:.*]] = mhlo.add %[[TMP_59]], %[[TMP_60]]
// CHECK: %[[TMP_62:.*]] = mhlo.multiply %[[TMP_61]], %[[TMP_42]]
// CHECK: %[[TMP_63:.*]] = mhlo.constant dense<934.52852717195765>
// CHECK: %[[TMP_64:.*]] = mhlo.add %[[TMP_62]], %[[TMP_63]]
// CHECK: %[[TMP_65:.*]] = mhlo.multiply %[[TMP_64]], %[[TMP_42]]
// CHECK: %[[TMP_66:.*]] = mhlo.constant dense<1027.5518868951572>
// CHECK: %[[TMP_67:.*]] = mhlo.add %[[TMP_65]], %[[TMP_66]]
// CHECK: %[[TMP_68:.*]] = mhlo.multiply %[[TMP_67]], %[[TMP_42]]
// CHECK: %[[TMP_69:.*]] = mhlo.constant dense<557.53533536939938>
// CHECK: %[[TMP_70:.*]] = mhlo.add %[[TMP_68]], %[[TMP_69]]
// CHECK: %[[TMP_71:.*]] = mhlo.multiply %[[TMP_41]], %[[TMP_70]]
// CHECK: %[[TMP_72:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_73:.*]] = mhlo.multiply %[[TMP_72]], %[[TMP_42]]
// CHECK: %[[TMP_74:.*]] = mhlo.constant dense<1.000000e+00>
// CHECK: %[[TMP_75:.*]] = mhlo.add %[[TMP_73]], %[[TMP_74]]
// CHECK: %[[TMP_76:.*]] = mhlo.multiply %[[TMP_75]], %[[TMP_42]]
// CHECK: %[[TMP_77:.*]] = mhlo.constant dense<13.228195115474499>
// CHECK: %[[TMP_78:.*]] = mhlo.add %[[TMP_76]], %[[TMP_77]]
// CHECK: %[[TMP_79:.*]] = mhlo.multiply %[[TMP_78]], %[[TMP_42]]
// CHECK: %[[TMP_80:.*]] = mhlo.constant dense<86.707214088598973>
// CHECK: %[[TMP_81:.*]] = mhlo.add %[[TMP_79]], %[[TMP_80]]
// CHECK: %[[TMP_82:.*]] = mhlo.multiply %[[TMP_81]], %[[TMP_42]]
// CHECK: %[[TMP_83:.*]] = mhlo.constant dense<354.93777888781989>
// CHECK: %[[TMP_84:.*]] = mhlo.add %[[TMP_82]], %[[TMP_83]]
// CHECK: %[[TMP_85:.*]] = mhlo.multiply %[[TMP_84]], %[[TMP_42]]
// CHECK: %[[TMP_86:.*]] = mhlo.constant dense<975.70850174320549>
// CHECK: %[[TMP_87:.*]] = mhlo.add %[[TMP_85]], %[[TMP_86]]
// CHECK: %[[TMP_88:.*]] = mhlo.multiply %[[TMP_87]], %[[TMP_42]]
// CHECK: %[[TMP_89:.*]] = mhlo.constant dense<1823.9091668790973>
// CHECK: %[[TMP_90:.*]] = mhlo.add %[[TMP_88]], %[[TMP_89]]
// CHECK: %[[TMP_91:.*]] = mhlo.multiply %[[TMP_90]], %[[TMP_42]]
// CHECK: %[[TMP_92:.*]] = mhlo.constant dense<2246.3376081871097>
// CHECK: %[[TMP_93:.*]] = mhlo.add %[[TMP_91]], %[[TMP_92]]
// CHECK: %[[TMP_94:.*]] = mhlo.multiply %[[TMP_93]], %[[TMP_42]]
// CHECK: %[[TMP_95:.*]] = mhlo.constant dense<1656.6630919416134>
// CHECK: %[[TMP_96:.*]] = mhlo.add %[[TMP_94]], %[[TMP_95]]
// CHECK: %[[TMP_97:.*]] = mhlo.multiply %[[TMP_96]], %[[TMP_42]]
// CHECK: %[[TMP_98:.*]] = mhlo.constant dense<557.53534081772773>
// CHECK: %[[TMP_99:.*]] = mhlo.add %[[TMP_97]], %[[TMP_98]]
// CHECK: %[[TMP_100:.*]] = mhlo.divide %[[TMP_71]], %[[TMP_99]]
// CHECK: %[[TMP_101:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_102:.*]] = mhlo.multiply %[[TMP_101]], %[[TMP_42]]
// CHECK: %[[TMP_103:.*]] = mhlo.constant dense<0.56418958354775506>
// CHECK: %[[TMP_104:.*]] = mhlo.add %[[TMP_102]], %[[TMP_103]]
// CHECK: %[[TMP_105:.*]] = mhlo.multiply %[[TMP_104]], %[[TMP_42]]
// CHECK: %[[TMP_106:.*]] = mhlo.constant dense<1.275366707599781>
// CHECK: %[[TMP_107:.*]] = mhlo.add %[[TMP_105]], %[[TMP_106]]
// CHECK: %[[TMP_108:.*]] = mhlo.multiply %[[TMP_107]], %[[TMP_42]]
// CHECK: %[[TMP_109:.*]] = mhlo.constant dense<5.0190504225118051>
// CHECK: %[[TMP_110:.*]] = mhlo.add %[[TMP_108]], %[[TMP_109]]
// CHECK: %[[TMP_111:.*]] = mhlo.multiply %[[TMP_110]], %[[TMP_42]]
// CHECK: %[[TMP_112:.*]] = mhlo.constant dense<6.160210979930536>
// CHECK: %[[TMP_113:.*]] = mhlo.add %[[TMP_111]], %[[TMP_112]]
// CHECK: %[[TMP_114:.*]] = mhlo.multiply %[[TMP_113]], %[[TMP_42]]
// CHECK: %[[TMP_115:.*]] = mhlo.constant dense<7.4097426995044895>
// CHECK: %[[TMP_116:.*]] = mhlo.add %[[TMP_114]], %[[TMP_115]]
// CHECK: %[[TMP_117:.*]] = mhlo.multiply %[[TMP_116]], %[[TMP_42]]
// CHECK: %[[TMP_118:.*]] = mhlo.constant dense<2.9788666537210022>
// CHECK: %[[TMP_119:.*]] = mhlo.add %[[TMP_117]], %[[TMP_118]]
// CHECK: %[[TMP_120:.*]] = mhlo.multiply %[[TMP_41]], %[[TMP_119]]
// CHECK: %[[TMP_121:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_122:.*]] = mhlo.multiply %[[TMP_121]], %[[TMP_42]]
// CHECK: %[[TMP_123:.*]] = mhlo.constant dense<1.000000e+00>
// CHECK: %[[TMP_124:.*]] = mhlo.add %[[TMP_122]], %[[TMP_123]]
// CHECK: %[[TMP_125:.*]] = mhlo.multiply %[[TMP_124]], %[[TMP_42]]
// CHECK: %[[TMP_126:.*]] = mhlo.constant dense<2.2605286322011726>
// CHECK: %[[TMP_127:.*]] = mhlo.add %[[TMP_125]], %[[TMP_126]]
// CHECK: %[[TMP_128:.*]] = mhlo.multiply %[[TMP_127]], %[[TMP_42]]
// CHECK: %[[TMP_129:.*]] = mhlo.constant dense<9.3960352493800147>
// CHECK: %[[TMP_130:.*]] = mhlo.add %[[TMP_128]], %[[TMP_129]]
// CHECK: %[[TMP_131:.*]] = mhlo.multiply %[[TMP_130]], %[[TMP_42]]
// CHECK: %[[TMP_132:.*]] = mhlo.constant dense<12.048953980809666>
// CHECK: %[[TMP_133:.*]] = mhlo.add %[[TMP_131]], %[[TMP_132]]
// CHECK: %[[TMP_134:.*]] = mhlo.multiply %[[TMP_133]], %[[TMP_42]]
// CHECK: %[[TMP_135:.*]] = mhlo.constant dense<17.081445074756591>
// CHECK: %[[TMP_136:.*]] = mhlo.add %[[TMP_134]], %[[TMP_135]]
// CHECK: %[[TMP_137:.*]] = mhlo.multiply %[[TMP_136]], %[[TMP_42]]
// CHECK: %[[TMP_138:.*]] = mhlo.constant dense<9.6089680906328585>
// CHECK: %[[TMP_139:.*]] = mhlo.add %[[TMP_137]], %[[TMP_138]]
// CHECK: %[[TMP_140:.*]] = mhlo.multiply %[[TMP_139]], %[[TMP_42]]
// CHECK: %[[TMP_141:.*]] = mhlo.constant dense<3.3690764510008151>
// CHECK: %[[TMP_142:.*]] = mhlo.add %[[TMP_140]], %[[TMP_141]]
// CHECK: %[[TMP_143:.*]] = mhlo.divide %[[TMP_120]], %[[TMP_142]]
// CHECK: %[[TMP_144:.*]] = mhlo.constant dense<8.000000e+00>
// CHECK: %[[TMP_145:.*]] = "mhlo.compare"(%[[TMP_42]], %[[TMP_144]]) {comparison_direction = "LT"}
// CHECK: %[[TMP_146:.*]] = "mhlo.select"(%[[TMP_145]], %[[TMP_100]], %[[TMP_143]])
// CHECK: %[[TMP_147:.*]] = mhlo.constant dense<-709.78271289338397>
// CHECK: %[[TMP_148:.*]] = "mhlo.compare"(%[[TMP_40]], %[[TMP_147]]) {comparison_direction = "LT"}
// CHECK: %[[TMP_149:.*]] = mhlo.constant dense<0.000000e+00>
// CHECK: %[[TMP_150:.*]] = "mhlo.select"(%[[TMP_148]], %[[TMP_149]], %[[TMP_146]])
// CHECK: %[[TMP_152:.*]] = "mhlo.compare"(%[[ARG]], %[[TMP_149]]) {comparison_direction = "LT"}
// CHECK: %[[TMP_153:.*]] = mhlo.constant dense<2.000000e+00>
// CHECK: %[[TMP_154:.*]] = mhlo.subtract %[[TMP_153]], %[[TMP_150]]
// CHECK: %[[TMP_155:.*]] = "mhlo.select"(%[[TMP_152]], %[[TMP_154]], %[[TMP_150]])
// CHECK: %[[TMP_156:.*]] = mhlo.subtract %[[TMP_38]], %[[TMP_155]]
// CHECK: %[[TMP_157:.*]] = "mhlo.abs"(%[[ARG]])
// CHECK: %[[TMP_159:.*]] = "mhlo.compare"(%[[TMP_157]], %[[TMP_38]]) {comparison_direction = "LT"}
// CHECK: %[[RESULT:.*]] = "mhlo.select"(%[[TMP_159]], %[[TMP_37]], %[[TMP_156]])
// CHECK: return %[[RESULT]]
%1 = "chlo.erf"(%arg) : (tensor<f64>) -> tensor<f64>
return %1 : tensor<f64>
}
// CHECK-LABEL: @erf_f32
// CHECK-SAME: %[[ARG:.*]]: tensor<f32>
func @erf_f32(%arg : tensor<f32>) -> tensor<f32> {