[Mlir-commits] [mlir] [MLIR][Math] Add erfc to math dialect (PR #126439)

Tue Feb 18 07:08:25 PST 2025

https://github.com/jsjodin updated https://github.com/llvm/llvm-project/pull/126439

>From 79986db4466199f391a8bcd8796fe0146ce23ab7 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sun, 9 Feb 2025 15:12:01 -0500
Subject: [PATCH 1/9] [MLIR][Math] Add erfc to math dialect

This patch adds the erfc op to the math dialect. It also does lowering of the
math.erfc op to libm calls. There is also a f32 polynomial approximation for
the function based on
https://stackoverflow.com/questions/35966695/vectorizable-implementation-of-complementary-error-function-erfcf
This is in turn based on
M. M. Shepherd and J. G. Laframboise, "Chebyshev Approximation of
(1+2x)exp(x^2)erfc x in 0 <= x < INF", Mathematics of Computation, Vol. 36,
No. 153, January 1981, pp. 249-253.
The code has a ULP error less than 3, which was tested, and MLIR test values
were verified against the C implementation.
---
 mlir/include/mlir/Dialect/Math/IR/MathOps.td  |  22 ++++
 .../Dialect/Math/Transforms/Approximation.h   |   8 ++
 .../mlir/Dialect/Math/Transforms/Passes.h     |   1 +
 mlir/lib/Conversion/MathToLibm/MathToLibm.cpp |   1 +
 mlir/lib/Dialect/Math/IR/MathOps.cpp          |  18 +++
 .../Transforms/PolynomialApproximation.cpp    | 121 ++++++++++++++++--
 .../Math/polynomial-approximation.mlir        | 112 ++++++++++++++++
 .../mlir-runner/math-polynomial-approx.mlir   |  72 +++++++++++
 mlir/utils/vim/syntax/mlir.vim                |   1 +
 9 files changed, 348 insertions(+), 8 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
index 8a277320e2f91..87b68540c85dd 100644
--- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td
+++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
@@ -560,6 +560,28 @@ def Math_ErfOp : Math_FloatUnaryOp<"erf"> {
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// ErfcOp
+//===----------------------------------------------------------------------===//
+
+def Math_ErfcOp : Math_FloatUnaryOp<"erfc"> {
+  let summary = "complementary error function of the specified value";
+  let description = [{
+    The `erfc` operation computes the complementary error function.
+    It takes one operand of floating point type (i.e., scalar, tensor or
+    vector) and returns one result of the same type.
+    It has no standard attributes.
+
+    Example:
+
+    ```mlir
+    // Scalar error function value.
+    %a = math.erfc %b : f64
+    ```
+  }];
+  let hasFolder = 1;
+}
+
 
 //===----------------------------------------------------------------------===//
 // ExpOp
diff --git a/mlir/include/mlir/Dialect/Math/Transforms/Approximation.h b/mlir/include/mlir/Dialect/Math/Transforms/Approximation.h
index b4ebc2f0f8fcd..ecfdb71817dff 100644
--- a/mlir/include/mlir/Dialect/Math/Transforms/Approximation.h
+++ b/mlir/include/mlir/Dialect/Math/Transforms/Approximation.h
@@ -23,6 +23,14 @@ struct ErfPolynomialApproximation : public OpRewritePattern<math::ErfOp> {
                                 PatternRewriter &rewriter) const final;
 };
 
+struct ErfcPolynomialApproximation : public OpRewritePattern<math::ErfcOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(math::ErfcOp op,
+                                PatternRewriter &rewriter) const final;
+};
+
 } // namespace math
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/Math/Transforms/Passes.h b/mlir/include/mlir/Dialect/Math/Transforms/Passes.h
index ea7a556297a76..9adc1c6940a15 100644
--- a/mlir/include/mlir/Dialect/Math/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Math/Transforms/Passes.h
@@ -47,6 +47,7 @@ struct MathPolynomialApproximationOptions {
 
 void populatePolynomialApproximateTanhPattern(RewritePatternSet &patterns);
 void populatePolynomialApproximateErfPattern(RewritePatternSet &patterns);
+void populatePolynomialApproximateErfcPattern(RewritePatternSet &patterns);
 
 // Adds patterns to convert to f32 around math functions for which `predicate`
 // returns true.
diff --git a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp
index c21ee9652b499..c4792884eb34a 100644
--- a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp
+++ b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp
@@ -181,6 +181,7 @@ void mlir::populateMathToLibmConversionPatterns(RewritePatternSet &patterns,
   populatePatternsForOp<math::CosOp>(patterns, benefit, ctx, "cosf", "cos");
   populatePatternsForOp<math::CoshOp>(patterns, benefit, ctx, "coshf", "cosh");
   populatePatternsForOp<math::ErfOp>(patterns, benefit, ctx, "erff", "erf");
+  populatePatternsForOp<math::ErfcOp>(patterns, benefit, ctx, "erfcf", "erfc");
   populatePatternsForOp<math::ExpOp>(patterns, benefit, ctx, "expf", "exp");
   populatePatternsForOp<math::Exp2Op>(patterns, benefit, ctx, "exp2f", "exp2");
   populatePatternsForOp<math::ExpM1Op>(patterns, benefit, ctx, "expm1f",
diff --git a/mlir/lib/Dialect/Math/IR/MathOps.cpp b/mlir/lib/Dialect/Math/IR/MathOps.cpp
index 42e357c012739..40e60e80abfbf 100644
--- a/mlir/lib/Dialect/Math/IR/MathOps.cpp
+++ b/mlir/lib/Dialect/Math/IR/MathOps.cpp
@@ -332,6 +332,24 @@ OpFoldResult math::ErfOp::fold(FoldAdaptor adaptor) {
       });
 }
 
+//===----------------------------------------------------------------------===//
+// ErfcOp folder
+//===----------------------------------------------------------------------===//
+
+OpFoldResult math::ErfcOp::fold(FoldAdaptor adaptor) {
+  return constFoldUnaryOpConditional<FloatAttr>(
+      adaptor.getOperands(), [](const APFloat &a) -> std::optional<APFloat> {
+        switch (a.getSizeInBits(a.getSemantics())) {
+        case 64:
+          return APFloat(erfc(a.convertToDouble()));
+        case 32:
+          return APFloat(erfcf(a.convertToFloat()));
+        default:
+          return {};
+        }
+      });
+}
+
 //===----------------------------------------------------------------------===//
 // IPowIOp folder
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 777427de9465c..29e7f3d61caa2 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -173,6 +173,10 @@ handleMultidimensionalVectors(ImplicitLocOpBuilder &builder,
 // Helper functions to create constants.
 //----------------------------------------------------------------------------//
 
+static Value boolCst(ImplicitLocOpBuilder &builder, bool value) {
+  return builder.create<arith::ConstantOp>(builder.getBoolAttr(value));
+}
+
 static Value floatCst(ImplicitLocOpBuilder &builder, float value,
                       Type elementType) {
   assert((elementType.isF16() || elementType.isF32()) &&
@@ -1118,12 +1122,102 @@ ErfPolynomialApproximation::matchAndRewrite(math::ErfOp op,
   return success();
 }
 
+// Approximates erfc(x) with
+LogicalResult
+ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
+                                             PatternRewriter &rewriter) const {
+  Value x = op.getOperand();
+  Type et = getElementTypeOrSelf(x);
+
+  if (!et.isF32())
+    return rewriter.notifyMatchFailure(op, "only f32 type is supported.");
+  std::optional<VectorShape> shape = vectorShape(x);
+
+  ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
+  auto bcast = [&](Value value) -> Value {
+    return broadcast(builder, value, shape);
+  };
+
+  Value trueValue = bcast(boolCst(builder, true));
+  Value zero = bcast(floatCst(builder, 0.0f, et));
+  Value one = bcast(floatCst(builder, 1.0f, et));
+  Value onehalf = bcast(floatCst(builder, 0.5f, et));
+  Value neg4 = bcast(floatCst(builder, -4.0f, et));
+  Value neg2 = bcast(floatCst(builder, -2.0f, et));
+  Value pos2 = bcast(floatCst(builder, 2.0f, et));
+  Value posInf = bcast(f32FromBits(builder, 0x7f800000u));
+  Value clampVal = bcast(floatCst(builder, 10.0546875f, et));
+
+  // Get abs(x)
+  Value isNegativeArg =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, x, zero);
+  Value negArg = builder.create<arith::NegFOp>(x);
+  Value a = builder.create<arith::SelectOp>(isNegativeArg, negArg, x);
+  Value p = builder.create<arith::AddFOp>(a, pos2);
+  Value r = builder.create<arith::DivFOp>(one, p);
+  Value q = builder.create<math::FmaOp>(neg4, r, one);
+  Value t = builder.create<math::FmaOp>(builder.create<arith::AddFOp>(q, one),
+                                        neg2, a);
+  Value e = builder.create<math::FmaOp>(builder.create<arith::NegFOp>(a), q, t);
+  q = builder.create<math::FmaOp>(r, e, q);
+
+  p = bcast(floatCst(builder, -0x1.a4a000p-12f, et));        // -4.01139259e-4
+  Value c1 = bcast(floatCst(builder, -0x1.42a260p-10f, et)); // -1.23075210e-3
+  p = builder.create<math::FmaOp>(p, q, c1);
+  Value c2 = bcast(floatCst(builder, 0x1.585714p-10f, et)); //  1.31355342e-3
+  p = builder.create<math::FmaOp>(p, q, c2);
+  Value c3 = bcast(floatCst(builder, 0x1.1adcc4p-07f, et)); // 8.63227434e-3
+  p = builder.create<math::FmaOp>(p, q, c3);
+  Value c4 = bcast(floatCst(builder, -0x1.081b82p-07f, et)); // -8.05991981e-3
+  p = builder.create<math::FmaOp>(p, q, c4);
+  Value c5 = bcast(floatCst(builder, -0x1.bc0b6ap-05f, et)); // -5.42046614e-2
+  p = builder.create<math::FmaOp>(p, q, c5);
+  Value c6 = bcast(floatCst(builder, 0x1.4ffc46p-03f, et)); //  1.64055392e-1
+  p = builder.create<math::FmaOp>(p, q, c6);
+  Value c7 = bcast(floatCst(builder, -0x1.540840p-03f, et)); // -1.66031361e-1
+  p = builder.create<math::FmaOp>(p, q, c7);
+  Value c8 = bcast(floatCst(builder, -0x1.7bf616p-04f, et)); // -9.27639827e-2
+  p = builder.create<math::FmaOp>(p, q, c8);
+  Value c9 = bcast(floatCst(builder, 0x1.1ba03ap-02f, et)); // 2.76978403e-1
+  p = builder.create<math::FmaOp>(p, q, c9);
+
+  Value d = builder.create<math::FmaOp>(pos2, a, one);
+  r = builder.create<arith::DivFOp>(one, d);
+  q = builder.create<math::FmaOp>(p, r, r);
+  e = builder.create<math::FmaOp>(
+      builder.create<math::FmaOp>(q, builder.create<arith::NegFOp>(a), onehalf),
+      pos2, builder.create<arith::SubFOp>(p, q));
+  r = builder.create<math::FmaOp>(e, r, q);
+
+  Value s = builder.create<arith::MulFOp>(a, a);
+  e = builder.create<math::ExpOp>(builder.create<arith::NegFOp>(s));
+
+  t = builder.create<math::FmaOp>(builder.create<arith::NegFOp>(a), a, s);
+  r = builder.create<math::FmaOp>(
+      r, e,
+      builder.create<arith::MulFOp>(builder.create<arith::MulFOp>(r, e), t));
+
+  Value isNotLessThanInf = builder.create<arith::XOrIOp>(
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, a, posInf),
+      trueValue);
+  r = builder.create<arith::SelectOp>(isNotLessThanInf,
+                                      builder.create<arith::AddFOp>(x, x), r);
+  Value isGreaterThanClamp =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, a, clampVal);
+  r = builder.create<arith::SelectOp>(isGreaterThanClamp, zero, r);
+
+  Value isNegative =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, x, zero);
+  r = builder.create<arith::SelectOp>(
+      isNegative, builder.create<arith::SubFOp>(pos2, r), r);
+
+  rewriter.replaceOp(op, r);
+  return success();
+}
 //----------------------------------------------------------------------------//
 // Exp approximation.
 //----------------------------------------------------------------------------//
-
 namespace {
-
 Value clampWithNormals(ImplicitLocOpBuilder &builder,
                        const std::optional<VectorShape> shape, Value value,
                        float lowerBound, float upperBound) {
@@ -1667,6 +1761,11 @@ void mlir::populatePolynomialApproximateErfPattern(
   patterns.add<ErfPolynomialApproximation>(patterns.getContext());
 }
 
+void mlir::populatePolynomialApproximateErfcPattern(
+    RewritePatternSet &patterns) {
+  patterns.add<ErfcPolynomialApproximation>(patterns.getContext());
+
+
 template <typename OpType>
 static void
 populateMathF32ExpansionPattern(RewritePatternSet &patterns,
@@ -1690,6 +1789,7 @@ void mlir::populateMathF32ExpansionPatterns(
   populateMathF32ExpansionPattern<math::CosOp>(patterns, predicate);
   populateMathF32ExpansionPattern<math::CoshOp>(patterns, predicate);
   populateMathF32ExpansionPattern<math::ErfOp>(patterns, predicate);
+  populateMathF32ExpansionPattern<math::ErfcOp>(patterns, predicate);
   populateMathF32ExpansionPattern<math::ExpOp>(patterns, predicate);
   populateMathF32ExpansionPattern<math::Exp2Op>(patterns, predicate);
   populateMathF32ExpansionPattern<math::ExpM1Op>(patterns, predicate);
@@ -1734,6 +1834,9 @@ void mlir::populateMathPolynomialApproximationPatterns(
       CosOp, SinAndCosApproximation<false, math::CosOp>>(patterns, predicate);
   populateMathPolynomialApproximationPattern<ErfOp, ErfPolynomialApproximation>(
       patterns, predicate);
+  populateMathPolynomialApproximationPattern<ErfcOp,
+                                             ErfcPolynomialApproximation>(
+      patterns, predicate);
   populateMathPolynomialApproximationPattern<ExpOp, ExpApproximation>(
       patterns, predicate);
   populateMathPolynomialApproximationPattern<ExpM1Op, ExpM1Approximation>(
@@ -1753,16 +1856,17 @@ void mlir::populateMathPolynomialApproximationPatterns(
 }
 
 void mlir::populateMathPolynomialApproximationPatterns(
-    RewritePatternSet &patterns,
+    RewritePatternSet & patterns,
     const MathPolynomialApproximationOptions &options) {
   mlir::populateMathF32ExpansionPatterns(patterns, [](StringRef name) -> bool {
     return llvm::is_contained(
         {math::AtanOp::getOperationName(), math::Atan2Op::getOperationName(),
          math::TanhOp::getOperationName(), math::LogOp::getOperationName(),
          math::Log2Op::getOperationName(), math::Log1pOp::getOperationName(),
-         math::ErfOp::getOperationName(), math::ExpOp::getOperationName(),
-         math::ExpM1Op::getOperationName(), math::CbrtOp::getOperationName(),
-         math::SinOp::getOperationName(), math::CosOp::getOperationName()},
+         math::ErfOp::getOperationName(), math::ErfcOp::getOperationName(),
+         math::ExpOp::getOperationName(), math::ExpM1Op::getOperationName(),
+         math::CbrtOp::getOperationName(), math::SinOp::getOperationName(),
+         math::CosOp::getOperationName()},
         name);
   });
 
@@ -1774,8 +1878,9 @@ void mlir::populateMathPolynomialApproximationPatterns(
              math::TanhOp::getOperationName(), math::LogOp::getOperationName(),
              math::Log2Op::getOperationName(),
              math::Log1pOp::getOperationName(), math::ErfOp::getOperationName(),
-             math::AsinOp::getOperationName(), math::AcosOp::getOperationName(),
-             math::ExpOp::getOperationName(), math::ExpM1Op::getOperationName(),
+             math::ErcfOp::getOperationName(), math::AsinOp::getOperationName(),
+             math::AcosOp::getOperationName(), math::ExpOp::getOperationName(),
+             math::ExpM1Op::getOperationName(),
              math::CbrtOp::getOperationName(), math::SinOp::getOperationName(),
              math::CosOp::getOperationName()},
             name);
diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index 81d071e6bbba3..badc95fa2d4aa 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -81,6 +81,118 @@ func.func @erf_scalar(%arg0: f32) -> f32 {
   return %0 : f32
 }
 
+// CHECK-LABEL: func @erfc_scalar(
+// CHECK-SAME:    %[[val_arg0:.*]]: f32) -> f32 {
+// CHECK-DAG:     %[[c127_i32:.*]] = arith.constant 127 : i32
+// CHECK-DAG:     %[[c23_i32:.*]] = arith.constant 23 : i32
+// CHECK-DAG:     %[[cst:.*]] = arith.constant 1.270000e+02 : f32
+// CHECK-DAG:     %[[cst_0:.*]] = arith.constant -1.270000e+02 : f32
+// CHECK-DAG:     %[[cst_1:.*]] = arith.constant 8.880000e+01 : f32
+// CHECK-DAG:     %[[cst_2:.*]] = arith.constant -8.780000e+01 : f32
+// CHECK-DAG:     %[[cst_3:.*]] = arith.constant 0.166666657 : f32
+// CHECK-DAG:     %[[cst_4:.*]] = arith.constant 0.0416657962 : f32
+// CHECK-DAG:     %[[cst_5:.*]] = arith.constant 0.00833345205 : f32
+// CHECK-DAG:     %[[cst_6:.*]] = arith.constant 0.00139819994 : f32
+// CHECK-DAG:     %[[cst_7:.*]] = arith.constant 1.98756912E-4 : f32
+// CHECK-DAG:     %[[cst_8:.*]] = arith.constant 2.12194442E-4 : f32
+// CHECK-DAG:     %[[cst_9:.*]] = arith.constant -0.693359375 : f32
+// CHECK-DAG:     %[[cst_10:.*]] = arith.constant 1.44269502 : f32
+// CHECK-DAG:     %[[cst_11:.*]] = arith.constant 0.276978403 : f32
+// CHECK-DAG:     %[[cst_12:.*]] = arith.constant -0.0927639827 : f32
+// CHECK-DAG:     %[[cst_13:.*]] = arith.constant -0.166031361 : f32
+// CHECK-DAG:     %[[cst_14:.*]] = arith.constant 0.164055392 : f32
+// CHECK-DAG:     %[[cst_15:.*]] = arith.constant -0.0542046614 : f32
+// CHECK-DAG:     %[[cst_16:.*]] = arith.constant -8.059920e-03 : f32
+// CHECK-DAG:     %[[cst_17:.*]] = arith.constant 0.00863227434 : f32
+// CHECK-DAG:     %[[cst_18:.*]] = arith.constant 0.00131355342 : f32
+// CHECK-DAG:     %[[cst_19:.*]] = arith.constant -0.0012307521 : f32
+// CHECK-DAG:     %[[cst_20:.*]] = arith.constant -4.01139259E-4 : f32
+// CHECK-DAG:     %[[cst_true:.*]] = arith.constant true
+// CHECK-DAG:     %[[cst_21:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG:     %[[cst_22:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK-DAG:     %[[cst_23:.*]] = arith.constant 5.000000e-01 : f32
+// CHECK-DAG:     %[[cst_24:.*]] = arith.constant -4.000000e+00 : f32
+// CHECK-DAG:     %[[cst_25:.*]] = arith.constant -2.000000e+00 : f32
+// CHECK-DAG:     %[[cst_26:.*]] = arith.constant 2.000000e+00 : f32
+// CHECK-DAG:     %[[cst_27:.*]] = arith.constant 0x7F800000 : f32
+// CHECK-DAG:     %[[cst_28:.*]] = arith.constant 10.0546875 : f32
+// CHECK:         %[[val_0:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
+// CHECK:         %[[val_1:.*]] = arith.negf %[[val_arg0]] : f32
+// CHECK:         %[[val_2:.*]] = arith.select %[[val_0]], %[[val_1]], %[[val_arg0]] : f32
+// CHECK:         %[[val_3:.*]] = arith.addf %[[val_2]], %[[cst_26]] : f32
+// CHECK:         %[[val_4:.*]] = arith.divf %[[cst_22]], %[[val_3]] : f32
+// CHECK:         %[[val_5:.*]] = math.fma %[[cst_24]], %[[val_4]], %[[cst_22]] : f32
+// CHECK:         %[[val_6:.*]] = arith.addf %[[val_5]], %[[cst_22]] : f32
+// CHECK:         %[[val_7:.*]] = math.fma %[[val_6]], %[[cst_25]], %[[val_2]] : f32
+// CHECK:         %[[val_8:.*]] = arith.negf %[[val_2]] : f32
+// CHECK:         %[[val_9:.*]] = math.fma %[[val_8]], %[[val_5]], %[[val_7]] : f32
+// CHECK:         %[[val_10:.*]] = math.fma %[[val_4]], %[[val_9]], %[[val_5]] : f32
+// CHECK:         %[[val_11:.*]] = math.fma %[[cst_20]], %[[val_10]], %[[cst_19]] : f32
+// CHECK:         %[[val_12:.*]] = math.fma %[[val_11]], %[[val_10]], %[[cst_18]] : f32
+// CHECK:         %[[val_13:.*]] = math.fma %[[val_12]], %[[val_10]], %[[cst_17]] : f32
+// CHECK:         %[[val_14:.*]] = math.fma %[[val_13]], %[[val_10]], %[[cst_16]] : f32
+// CHECK:         %[[val_15:.*]] = math.fma %[[val_14]], %[[val_10]], %[[cst_15]] : f32
+// CHECK:         %[[val_16:.*]] = math.fma %[[val_15]], %[[val_10]], %[[cst_14]] : f32
+// CHECK:         %[[val_17:.*]] = math.fma %[[val_16]], %[[val_10]], %[[cst_13]] : f32
+// CHECK:         %[[val_18:.*]] = math.fma %[[val_17]], %[[val_10]], %[[cst_12]] : f32
+// CHECK:         %[[val_19:.*]] = math.fma %[[val_18]], %[[val_10]], %[[cst_11]] : f32
+// CHECK:         %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
+// CHECK:         %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
+// CHECK:         %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
+// CHECK:         %[[val_23:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
+// CHECK:         %[[val_24:.*]] = arith.negf %[[val_2]] : f32
+// CHECK:         %[[val_25:.*]] = math.fma %[[val_22]], %[[val_24]], %[[cst_23]] : f32
+// CHECK:         %[[val_26:.*]] = math.fma %[[val_25]], %[[cst_26]], %[[val_23]] : f32
+// CHECK:         %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
+// CHECK:         %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
+// CHECK:         %[[val_29:.*]] = arith.negf %[[val_28]] : f32
+// CHECK:         %[[val_30:.*]] = arith.cmpf uge, %[[val_29]], %[[cst_2]] : f32
+// CHECK:         %[[val_31:.*]] = arith.select %[[val_30]], %[[val_29]], %[[cst_2]] : f32
+// CHECK:         %[[val_32:.*]] = arith.cmpf ule, %[[val_31]], %[[cst_1]] : f32
+// CHECK:         %[[val_33:.*]] = arith.select %[[val_32]], %[[val_31]], %[[cst_1]] : f32
+// CHECK:         %[[val_34:.*]] = math.fma %[[val_33]], %[[cst_10]], %[[cst_23]] : f32
+// CHECK:         %[[val_35:.*]] = math.floor %[[val_34]] : f32
+// CHECK:         %[[val_36:.*]] = arith.cmpf uge, %[[val_35]], %[[cst_0]] : f32
+// CHECK:         %[[val_37:.*]] = arith.select %[[val_36]], %[[val_35]], %[[cst_0]] : f32
+// CHECK:         %[[val_38:.*]] = arith.cmpf ule, %[[val_37]], %[[cst]] : f32
+// CHECK:         %[[val_39:.*]] = arith.select %[[val_38]], %[[val_37]], %[[cst]] : f32
+// CHECK:         %[[val_40:.*]] = math.fma %[[cst_9]], %[[val_39]], %[[val_33]] : f32
+// CHECK:         %[[val_41:.*]] = math.fma %[[cst_8]], %[[val_39]], %[[val_40]] : f32
+// CHECK:         %[[val_42:.*]] = math.fma %[[val_41]], %[[cst_7]], %[[cst_6]] : f32
+// CHECK:         %[[val_43:.*]] = math.fma %[[val_42]], %[[val_41]], %[[cst_5]] : f32
+// CHECK:         %[[val_44:.*]] = math.fma %[[val_43]], %[[val_41]], %[[cst_4]] : f32
+// CHECK:         %[[val_45:.*]] = math.fma %[[val_44]], %[[val_41]], %[[cst_3]] : f32
+// CHECK:         %[[val_46:.*]] = math.fma %[[val_45]], %[[val_41]], %[[cst_23]] : f32
+// CHECK:         %[[val_47:.*]] = arith.mulf %[[val_41]], %[[val_41]] : f32
+// CHECK:         %[[val_48:.*]] = math.fma %[[val_46]], %[[val_47]], %[[val_41]] : f32
+// CHECK:         %[[val_49:.*]] = arith.addf %[[val_48]], %[[cst_22]] : f32
+// CHECK:         %[[val_50:.*]] = arith.fptosi %[[val_39]] : f32 to i32
+// CHECK:         %[[val_51:.*]] = arith.addi %[[val_50]], %[[c127_i32]] : i32
+// CHECK:         %[[val_52:.*]] = arith.shli %[[val_51]], %[[c23_i32]] : i32
+// CHECK:         %[[val_53:.*]] = arith.bitcast %[[val_52]] : i32 to f32
+// CHECK:         %[[val_54:.*]] = arith.mulf %[[val_49]], %[[val_53]] : f32
+// CHECK:         %[[val_55:.*]] = arith.negf %[[val_2]] : f32
+// CHECK:         %[[val_56:.*]] = math.fma %[[val_55]], %[[val_2]], %[[val_28]] : f32
+// CHECK:         %[[val_57:.*]] = arith.mulf %[[val_27]], %[[val_54]] : f32
+// CHECK:         %[[val_58:.*]] = arith.mulf %[[val_57]], %[[val_56]] : f32
+// CHECK:         %[[val_59:.*]] = math.fma %[[val_27]], %[[val_54]], %[[val_58]] : f32
+// CHECK:         %[[val_60:.*]] = arith.cmpf olt, %[[val_2]], %[[cst_27]] : f32
+// CHECK:         %[[val_61:.*]] = arith.xori %[[val_60]], %[[cst_true]] : i1
+// CHECK:         %[[val_62:.*]] = arith.addf %[[val_arg0]], %[[val_arg0]] : f32
+// CHECK:         %[[val_63:.*]] = arith.select %[[val_61]], %[[val_62]], %[[val_59]] : f32
+// CHECK:         %[[val_64:.*]] = arith.cmpf ogt, %[[val_2]], %[[cst_28]] : f32
+// CHECK:         %[[val_65:.*]] = arith.select %[[val_64]], %[[cst_21]], %[[val_63]] : f32
+// CHECK:         %[[val_66:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
+// CHECK:         %[[val_67:.*]] = arith.subf %[[cst_26]], %[[val_65]] : f32
+// CHECK:         %[[val_68:.*]] = arith.select %[[val_66]], %[[val_67]], %[[val_65]] : f32
+// CHECK:         return %[[val_68]] : f32
+// CHECK:       }
+
+func.func @erfc_scalar(%arg0: f32) -> f32 {
+  %0 = math.erfc %arg0 : f32
+  return %0 : f32
+}
+
 // CHECK-LABEL:   func @erf_vector(
 // CHECK-SAME:                     %[[arg0:.*]]: vector<8xf32>) -> vector<8xf32> {
 // CHECK:           %[[zero:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>
diff --git a/mlir/test/mlir-runner/math-polynomial-approx.mlir b/mlir/test/mlir-runner/math-polynomial-approx.mlir
index 148ef25cead62..6ed03916f1e15 100644
--- a/mlir/test/mlir-runner/math-polynomial-approx.mlir
+++ b/mlir/test/mlir-runner/math-polynomial-approx.mlir
@@ -273,6 +273,77 @@ func.func @erf() {
   return
 }
 
+// -------------------------------------------------------------------------- //
+// Erfc.
+// -------------------------------------------------------------------------- //
+func.func @erfc_f32(%a : f32) {
+  %r = math.erfc %a : f32
+  vector.print %r : f32
+  return
+}
+
+func.func @erfc_4xf32(%a : vector<4xf32>) {
+  %r = math.erfc %a : vector<4xf32>
+  vector.print %r : vector<4xf32>
+  return
+}
+
+func.func @erfc() {
+  // CHECK: 1.00027
+  %val1 = arith.constant -2.431864e-4 : f32
+  call @erfc_f32(%val1) : (f32) -> ()
+
+  // CHECK: 0.257905
+  %val2 = arith.constant 0.79999 : f32
+  call @erfc_f32(%val2) : (f32) -> ()
+
+  // CHECK: 0.257899
+  %val3 = arith.constant 0.8 : f32
+  call @erfc_f32(%val3) : (f32) -> ()
+
+  // CHECK: 0.00467794
+  %val4 = arith.constant 1.99999 : f32
+  call @erfc_f32(%val4) : (f32) -> ()
+
+  // CHECK: 0.00467774
+  %val5 = arith.constant 2.0 : f32
+  call @erfc_f32(%val5) : (f32) -> ()
+
+  // CHECK: 1.13736e-07
+  %val6 = arith.constant 3.74999 : f32
+  call @erfc_f32(%val6) : (f32) -> ()
+
+  // CHECK: 1.13727e-07
+  %val7 = arith.constant 3.75 : f32
+  call @erfc_f32(%val7) : (f32) -> ()
+
+  // CHECK: 2
+  %negativeInf = arith.constant 0xff800000 : f32
+  call @erfc_f32(%negativeInf) : (f32) -> ()
+
+  // CHECK: 2, 2, 1.91376, 1.73145
+  %vecVals1 = arith.constant dense<[-3.4028235e+38, -4.54318, -1.2130899, -7.8234202e-01]> : vector<4xf32>
+  call @erfc_4xf32(%vecVals1) : (vector<4xf32>) -> ()
+
+  // CHECK: 1, 1, 1, 0.878681
+  %vecVals2 = arith.constant dense<[-1.1754944e-38, 0.0, 1.1754944e-38, 1.0793410e-01]> : vector<4xf32>
+  call @erfc_4xf32(%vecVals2) : (vector<4xf32>) -> ()
+
+  // CHECK: 0.0805235, 0.000931045, 6.40418e-08, 0
+  %vecVals3 = arith.constant dense<[1.23578, 2.34093, 3.82342, 3.4028235e+38]> : vector<4xf32>
+  call @erfc_4xf32(%vecVals3) : (vector<4xf32>) -> ()
+
+  // CHECK: 0
+  %inf = arith.constant 0x7f800000 : f32
+  call @erfc_f32(%inf) : (f32) -> ()
+
+  // CHECK: nan
+  %nan = arith.constant 0x7fc00000 : f32
+  call @erfc_f32(%nan) : (f32) -> ()
+
+  return
+}
+
 // -------------------------------------------------------------------------- //
 // Exp.
 // -------------------------------------------------------------------------- //
@@ -772,6 +843,7 @@ func.func @main() {
   call @log2(): () -> ()
   call @log1p(): () -> ()
   call @erf(): () -> ()
+  call @erfc(): () -> ()
   call @exp(): () -> ()
   call @expm1(): () -> ()
   call @sin(): () -> ()
diff --git a/mlir/utils/vim/syntax/mlir.vim b/mlir/utils/vim/syntax/mlir.vim
index 7989032eada88..070d81658ca3d 100644
--- a/mlir/utils/vim/syntax/mlir.vim
+++ b/mlir/utils/vim/syntax/mlir.vim
@@ -44,6 +44,7 @@ syn keyword mlirOps view
 
 " Math ops.
 syn match mlirOps /\<math\.erf\>/
+syn match mlirOps /\<math\.erfc\>/
 
 " Affine ops.
 syn match mlirOps /\<affine\.apply\>/

>From 7cb9ca2cf87d1bf9aa1834bea5a7ff4c7614564f Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sun, 9 Feb 2025 21:11:24 -0500
Subject: [PATCH 2/9] Use CHECK-NEXT to see where mismatch happens on Linux.

---
 .../Math/polynomial-approximation.mlir        | 140 +++++++++---------
 1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index badc95fa2d4aa..ca37fdf1edc45 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -117,76 +117,76 @@ func.func @erf_scalar(%arg0: f32) -> f32 {
 // CHECK-DAG:     %[[cst_27:.*]] = arith.constant 0x7F800000 : f32
 // CHECK-DAG:     %[[cst_28:.*]] = arith.constant 10.0546875 : f32
 // CHECK:         %[[val_0:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
-// CHECK:         %[[val_1:.*]] = arith.negf %[[val_arg0]] : f32
-// CHECK:         %[[val_2:.*]] = arith.select %[[val_0]], %[[val_1]], %[[val_arg0]] : f32
-// CHECK:         %[[val_3:.*]] = arith.addf %[[val_2]], %[[cst_26]] : f32
-// CHECK:         %[[val_4:.*]] = arith.divf %[[cst_22]], %[[val_3]] : f32
-// CHECK:         %[[val_5:.*]] = math.fma %[[cst_24]], %[[val_4]], %[[cst_22]] : f32
-// CHECK:         %[[val_6:.*]] = arith.addf %[[val_5]], %[[cst_22]] : f32
-// CHECK:         %[[val_7:.*]] = math.fma %[[val_6]], %[[cst_25]], %[[val_2]] : f32
-// CHECK:         %[[val_8:.*]] = arith.negf %[[val_2]] : f32
-// CHECK:         %[[val_9:.*]] = math.fma %[[val_8]], %[[val_5]], %[[val_7]] : f32
-// CHECK:         %[[val_10:.*]] = math.fma %[[val_4]], %[[val_9]], %[[val_5]] : f32
-// CHECK:         %[[val_11:.*]] = math.fma %[[cst_20]], %[[val_10]], %[[cst_19]] : f32
-// CHECK:         %[[val_12:.*]] = math.fma %[[val_11]], %[[val_10]], %[[cst_18]] : f32
-// CHECK:         %[[val_13:.*]] = math.fma %[[val_12]], %[[val_10]], %[[cst_17]] : f32
-// CHECK:         %[[val_14:.*]] = math.fma %[[val_13]], %[[val_10]], %[[cst_16]] : f32
-// CHECK:         %[[val_15:.*]] = math.fma %[[val_14]], %[[val_10]], %[[cst_15]] : f32
-// CHECK:         %[[val_16:.*]] = math.fma %[[val_15]], %[[val_10]], %[[cst_14]] : f32
-// CHECK:         %[[val_17:.*]] = math.fma %[[val_16]], %[[val_10]], %[[cst_13]] : f32
-// CHECK:         %[[val_18:.*]] = math.fma %[[val_17]], %[[val_10]], %[[cst_12]] : f32
-// CHECK:         %[[val_19:.*]] = math.fma %[[val_18]], %[[val_10]], %[[cst_11]] : f32
-// CHECK:         %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
-// CHECK:         %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
-// CHECK:         %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
-// CHECK:         %[[val_23:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
-// CHECK:         %[[val_24:.*]] = arith.negf %[[val_2]] : f32
-// CHECK:         %[[val_25:.*]] = math.fma %[[val_22]], %[[val_24]], %[[cst_23]] : f32
-// CHECK:         %[[val_26:.*]] = math.fma %[[val_25]], %[[cst_26]], %[[val_23]] : f32
-// CHECK:         %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
-// CHECK:         %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
-// CHECK:         %[[val_29:.*]] = arith.negf %[[val_28]] : f32
-// CHECK:         %[[val_30:.*]] = arith.cmpf uge, %[[val_29]], %[[cst_2]] : f32
-// CHECK:         %[[val_31:.*]] = arith.select %[[val_30]], %[[val_29]], %[[cst_2]] : f32
-// CHECK:         %[[val_32:.*]] = arith.cmpf ule, %[[val_31]], %[[cst_1]] : f32
-// CHECK:         %[[val_33:.*]] = arith.select %[[val_32]], %[[val_31]], %[[cst_1]] : f32
-// CHECK:         %[[val_34:.*]] = math.fma %[[val_33]], %[[cst_10]], %[[cst_23]] : f32
-// CHECK:         %[[val_35:.*]] = math.floor %[[val_34]] : f32
-// CHECK:         %[[val_36:.*]] = arith.cmpf uge, %[[val_35]], %[[cst_0]] : f32
-// CHECK:         %[[val_37:.*]] = arith.select %[[val_36]], %[[val_35]], %[[cst_0]] : f32
-// CHECK:         %[[val_38:.*]] = arith.cmpf ule, %[[val_37]], %[[cst]] : f32
-// CHECK:         %[[val_39:.*]] = arith.select %[[val_38]], %[[val_37]], %[[cst]] : f32
-// CHECK:         %[[val_40:.*]] = math.fma %[[cst_9]], %[[val_39]], %[[val_33]] : f32
-// CHECK:         %[[val_41:.*]] = math.fma %[[cst_8]], %[[val_39]], %[[val_40]] : f32
-// CHECK:         %[[val_42:.*]] = math.fma %[[val_41]], %[[cst_7]], %[[cst_6]] : f32
-// CHECK:         %[[val_43:.*]] = math.fma %[[val_42]], %[[val_41]], %[[cst_5]] : f32
-// CHECK:         %[[val_44:.*]] = math.fma %[[val_43]], %[[val_41]], %[[cst_4]] : f32
-// CHECK:         %[[val_45:.*]] = math.fma %[[val_44]], %[[val_41]], %[[cst_3]] : f32
-// CHECK:         %[[val_46:.*]] = math.fma %[[val_45]], %[[val_41]], %[[cst_23]] : f32
-// CHECK:         %[[val_47:.*]] = arith.mulf %[[val_41]], %[[val_41]] : f32
-// CHECK:         %[[val_48:.*]] = math.fma %[[val_46]], %[[val_47]], %[[val_41]] : f32
-// CHECK:         %[[val_49:.*]] = arith.addf %[[val_48]], %[[cst_22]] : f32
-// CHECK:         %[[val_50:.*]] = arith.fptosi %[[val_39]] : f32 to i32
-// CHECK:         %[[val_51:.*]] = arith.addi %[[val_50]], %[[c127_i32]] : i32
-// CHECK:         %[[val_52:.*]] = arith.shli %[[val_51]], %[[c23_i32]] : i32
-// CHECK:         %[[val_53:.*]] = arith.bitcast %[[val_52]] : i32 to f32
-// CHECK:         %[[val_54:.*]] = arith.mulf %[[val_49]], %[[val_53]] : f32
-// CHECK:         %[[val_55:.*]] = arith.negf %[[val_2]] : f32
-// CHECK:         %[[val_56:.*]] = math.fma %[[val_55]], %[[val_2]], %[[val_28]] : f32
-// CHECK:         %[[val_57:.*]] = arith.mulf %[[val_27]], %[[val_54]] : f32
-// CHECK:         %[[val_58:.*]] = arith.mulf %[[val_57]], %[[val_56]] : f32
-// CHECK:         %[[val_59:.*]] = math.fma %[[val_27]], %[[val_54]], %[[val_58]] : f32
-// CHECK:         %[[val_60:.*]] = arith.cmpf olt, %[[val_2]], %[[cst_27]] : f32
-// CHECK:         %[[val_61:.*]] = arith.xori %[[val_60]], %[[cst_true]] : i1
-// CHECK:         %[[val_62:.*]] = arith.addf %[[val_arg0]], %[[val_arg0]] : f32
-// CHECK:         %[[val_63:.*]] = arith.select %[[val_61]], %[[val_62]], %[[val_59]] : f32
-// CHECK:         %[[val_64:.*]] = arith.cmpf ogt, %[[val_2]], %[[cst_28]] : f32
-// CHECK:         %[[val_65:.*]] = arith.select %[[val_64]], %[[cst_21]], %[[val_63]] : f32
-// CHECK:         %[[val_66:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
-// CHECK:         %[[val_67:.*]] = arith.subf %[[cst_26]], %[[val_65]] : f32
-// CHECK:         %[[val_68:.*]] = arith.select %[[val_66]], %[[val_67]], %[[val_65]] : f32
-// CHECK:         return %[[val_68]] : f32
-// CHECK:       }
+// CHECK-NEXT:    %[[val_1:.*]] = arith.negf %[[val_arg0]] : f32
+// CHECK-NEXT:         %[[val_2:.*]] = arith.select %[[val_0]], %[[val_1]], %[[val_arg0]] : f32
+// CHECK-NEXT:         %[[val_3:.*]] = arith.addf %[[val_2]], %[[cst_26]] : f32
+// CHECK-NEXT:         %[[val_4:.*]] = arith.divf %[[cst_22]], %[[val_3]] : f32
+// CHECK-NEXT:         %[[val_5:.*]] = math.fma %[[cst_24]], %[[val_4]], %[[cst_22]] : f32
+// CHECK-NEXT:         %[[val_6:.*]] = arith.addf %[[val_5]], %[[cst_22]] : f32
+// CHECK-NEXT:         %[[val_7:.*]] = math.fma %[[val_6]], %[[cst_25]], %[[val_2]] : f32
+// CHECK-NEXT:         %[[val_8:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:         %[[val_9:.*]] = math.fma %[[val_8]], %[[val_5]], %[[val_7]] : f32
+// CHECK-NEXT:         %[[val_10:.*]] = math.fma %[[val_4]], %[[val_9]], %[[val_5]] : f32
+// CHECK-NEXT:         %[[val_11:.*]] = math.fma %[[cst_20]], %[[val_10]], %[[cst_19]] : f32
+// CHECK-NEXT:         %[[val_12:.*]] = math.fma %[[val_11]], %[[val_10]], %[[cst_18]] : f32
+// CHECK-NEXT:         %[[val_13:.*]] = math.fma %[[val_12]], %[[val_10]], %[[cst_17]] : f32
+// CHECK-NEXT:         %[[val_14:.*]] = math.fma %[[val_13]], %[[val_10]], %[[cst_16]] : f32
+// CHECK-NEXT:         %[[val_15:.*]] = math.fma %[[val_14]], %[[val_10]], %[[cst_15]] : f32
+// CHECK-NEXT:         %[[val_16:.*]] = math.fma %[[val_15]], %[[val_10]], %[[cst_14]] : f32
+// CHECK-NEXT:         %[[val_17:.*]] = math.fma %[[val_16]], %[[val_10]], %[[cst_13]] : f32
+// CHECK-NEXT:         %[[val_18:.*]] = math.fma %[[val_17]], %[[val_10]], %[[cst_12]] : f32
+// CHECK-NEXT:         %[[val_19:.*]] = math.fma %[[val_18]], %[[val_10]], %[[cst_11]] : f32
+// CHECK-NEXT:         %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
+// CHECK-NEXT:         %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
+// CHECK-NEXT:         %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
+// CHECK-NEXT:         %[[val_23:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
+// CHECK-NEXT:         %[[val_24:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:         %[[val_25:.*]] = math.fma %[[val_22]], %[[val_24]], %[[cst_23]] : f32
+// CHECK-NEXT:         %[[val_26:.*]] = math.fma %[[val_25]], %[[cst_26]], %[[val_23]] : f32
+// CHECK-NEXT:         %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
+// CHECK-NEXT:         %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
+// CHECK-NEXT:         %[[val_29:.*]] = arith.negf %[[val_28]] : f32
+// CHECK-NEXT:         %[[val_30:.*]] = arith.cmpf uge, %[[val_29]], %[[cst_2]] : f32
+// CHECK-NEXT:         %[[val_31:.*]] = arith.select %[[val_30]], %[[val_29]], %[[cst_2]] : f32
+// CHECK-NEXT:         %[[val_32:.*]] = arith.cmpf ule, %[[val_31]], %[[cst_1]] : f32
+// CHECK-NEXT:         %[[val_33:.*]] = arith.select %[[val_32]], %[[val_31]], %[[cst_1]] : f32
+// CHECK-NEXT:         %[[val_34:.*]] = math.fma %[[val_33]], %[[cst_10]], %[[cst_23]] : f32
+// CHECK-NEXT:         %[[val_35:.*]] = math.floor %[[val_34]] : f32
+// CHECK-NEXT:         %[[val_36:.*]] = arith.cmpf uge, %[[val_35]], %[[cst_0]] : f32
+// CHECK-NEXT:         %[[val_37:.*]] = arith.select %[[val_36]], %[[val_35]], %[[cst_0]] : f32
+// CHECK-NEXT:         %[[val_38:.*]] = arith.cmpf ule, %[[val_37]], %[[cst]] : f32
+// CHECK-NEXT:         %[[val_39:.*]] = arith.select %[[val_38]], %[[val_37]], %[[cst]] : f32
+// CHECK-NEXT:         %[[val_40:.*]] = math.fma %[[cst_9]], %[[val_39]], %[[val_33]] : f32
+// CHECK-NEXT:         %[[val_41:.*]] = math.fma %[[cst_8]], %[[val_39]], %[[val_40]] : f32
+// CHECK-NEXT:         %[[val_42:.*]] = math.fma %[[val_41]], %[[cst_7]], %[[cst_6]] : f32
+// CHECK-NEXT:         %[[val_43:.*]] = math.fma %[[val_42]], %[[val_41]], %[[cst_5]] : f32
+// CHECK-NEXT:         %[[val_44:.*]] = math.fma %[[val_43]], %[[val_41]], %[[cst_4]] : f32
+// CHECK-NEXT:         %[[val_45:.*]] = math.fma %[[val_44]], %[[val_41]], %[[cst_3]] : f32
+// CHECK-NEXT:         %[[val_46:.*]] = math.fma %[[val_45]], %[[val_41]], %[[cst_23]] : f32
+// CHECK-NEXT:         %[[val_47:.*]] = arith.mulf %[[val_41]], %[[val_41]] : f32
+// CHECK-NEXT:         %[[val_48:.*]] = math.fma %[[val_46]], %[[val_47]], %[[val_41]] : f32
+// CHECK-NEXT:         %[[val_49:.*]] = arith.addf %[[val_48]], %[[cst_22]] : f32
+// CHECK-NEXT:         %[[val_50:.*]] = arith.fptosi %[[val_39]] : f32 to i32
+// CHECK-NEXT:         %[[val_51:.*]] = arith.addi %[[val_50]], %[[c127_i32]] : i32
+// CHECK-NEXT:         %[[val_52:.*]] = arith.shli %[[val_51]], %[[c23_i32]] : i32
+// CHECK-NEXT:         %[[val_53:.*]] = arith.bitcast %[[val_52]] : i32 to f32
+// CHECK-NEXT:         %[[val_54:.*]] = arith.mulf %[[val_49]], %[[val_53]] : f32
+// CHECK-NEXT:         %[[val_55:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:         %[[val_56:.*]] = math.fma %[[val_55]], %[[val_2]], %[[val_28]] : f32
+// CHECK-NEXT:         %[[val_57:.*]] = arith.mulf %[[val_27]], %[[val_54]] : f32
+// CHECK-NEXT:         %[[val_58:.*]] = arith.mulf %[[val_57]], %[[val_56]] : f32
+// CHECK-NEXT:         %[[val_59:.*]] = math.fma %[[val_27]], %[[val_54]], %[[val_58]] : f32
+// CHECK-NEXT:         %[[val_60:.*]] = arith.cmpf olt, %[[val_2]], %[[cst_27]] : f32
+// CHECK-NEXT:         %[[val_61:.*]] = arith.xori %[[val_60]], %[[cst_true]] : i1
+// CHECK-NEXT:         %[[val_62:.*]] = arith.addf %[[val_arg0]], %[[val_arg0]] : f32
+// CHECK-NEXT:         %[[val_63:.*]] = arith.select %[[val_61]], %[[val_62]], %[[val_59]] : f32
+// CHECK-NEXT:         %[[val_64:.*]] = arith.cmpf ogt, %[[val_2]], %[[cst_28]] : f32
+// CHECK-NEXT:         %[[val_65:.*]] = arith.select %[[val_64]], %[[cst_21]], %[[val_63]] : f32
+// CHECK-NEXT:         %[[val_66:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
+// CHECK-NEXT:         %[[val_67:.*]] = arith.subf %[[cst_26]], %[[val_65]] : f32
+// CHECK-NEXT:         %[[val_68:.*]] = arith.select %[[val_66]], %[[val_67]], %[[val_65]] : f32
+// CHECK-NEXT:         return %[[val_68]] : f32
+// CHECK-NEXT:       }
 
 func.func @erfc_scalar(%arg0: f32) -> f32 {
   %0 = math.erfc %arg0 : f32

>From 92f65439043026e17b46ea26a5cd39b9823e0f1b Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 10 Feb 2025 07:23:17 -0500
Subject: [PATCH 3/9] Make the order creating the ops explicit to make sure the
 same sequence is created for different Linux systems.

---
 .../Dialect/Math/Transforms/PolynomialApproximation.cpp   | 7 ++++---
 mlir/test/Dialect/Math/polynomial-approximation.mlir      | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 29e7f3d61caa2..aa35599cca16e 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -1184,9 +1184,10 @@ ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
   Value d = builder.create<math::FmaOp>(pos2, a, one);
   r = builder.create<arith::DivFOp>(one, d);
   q = builder.create<math::FmaOp>(p, r, r);
-  e = builder.create<math::FmaOp>(
-      builder.create<math::FmaOp>(q, builder.create<arith::NegFOp>(a), onehalf),
-      pos2, builder.create<arith::SubFOp>(p, q));
+  Value negfa = builder.create<arith::NegFOp>(a);
+  Value fmaqah = builder.create<math::FmaOp>(q, negfa, onehalf);
+  Value psubq = builder.create<arith::SubFOp>(p, q);
+  e = builder.create<math::FmaOp>(fmaqah, pos2, psubq);
   r = builder.create<math::FmaOp>(e, r, q);
 
   Value s = builder.create<arith::MulFOp>(a, a);
diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index ca37fdf1edc45..0a9bced067a65 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -139,10 +139,10 @@ func.func @erf_scalar(%arg0: f32) -> f32 {
 // CHECK-NEXT:         %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
 // CHECK-NEXT:         %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
 // CHECK-NEXT:         %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
-// CHECK-NEXT:         %[[val_23:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
-// CHECK-NEXT:         %[[val_24:.*]] = arith.negf %[[val_2]] : f32
-// CHECK-NEXT:         %[[val_25:.*]] = math.fma %[[val_22]], %[[val_24]], %[[cst_23]] : f32
-// CHECK-NEXT:         %[[val_26:.*]] = math.fma %[[val_25]], %[[cst_26]], %[[val_23]] : f32
+// CHECK-NEXT:         %[[val_23:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:         %[[val_24:.*]] = math.fma %[[val_22]], %[[val_23]], %[[cst_23]] : f32
+// CHECK-NEXT:         %[[val_25:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
+// CHECK-NEXT:         %[[val_26:.*]] = math.fma %[[val_24]], %[[cst_26]], %[[val_25]] : f32
 // CHECK-NEXT:         %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
 // CHECK-NEXT:         %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
 // CHECK-NEXT:         %[[val_29:.*]] = arith.negf %[[val_28]] : f32

>From d31b6619ad4c432f70134e16aac9166563fe1d42 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 10 Feb 2025 17:41:20 -0500
Subject: [PATCH 4/9] Add information about approximation.

---
 mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index aa35599cca16e..af14cb0e33b1d 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -1122,7 +1122,9 @@ ErfPolynomialApproximation::matchAndRewrite(math::ErfOp op,
   return success();
 }
 
-// Approximates erfc(x) with
+// Approximates erfc(x) with p((x - 2) / (x + 2)), where p is a 9 degree
+// polynomial.This approximation is based on the following stackoverflow post:
+// https://stackoverflow.com/questions/35966695/vectorizable-implementation-of-complementary-error-function-erfcf
 LogicalResult
 ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
                                              PatternRewriter &rewriter) const {

>From 0a4ac2e4499d101157a77f90d891172d89fe960e Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 11 Feb 2025 09:50:30 -0500
Subject: [PATCH 5/9] Fix typo

---
 mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index af14cb0e33b1d..0dde66f99dfc2 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -1767,7 +1767,7 @@ void mlir::populatePolynomialApproximateErfPattern(
 void mlir::populatePolynomialApproximateErfcPattern(
     RewritePatternSet &patterns) {
   patterns.add<ErfcPolynomialApproximation>(patterns.getContext());
-
+}
 
 template <typename OpType>
 static void
@@ -1881,7 +1881,7 @@ void mlir::populateMathPolynomialApproximationPatterns(
              math::TanhOp::getOperationName(), math::LogOp::getOperationName(),
              math::Log2Op::getOperationName(),
              math::Log1pOp::getOperationName(), math::ErfOp::getOperationName(),
-             math::ErcfOp::getOperationName(), math::AsinOp::getOperationName(),
+             math::ErfcOp::getOperationName(), math::AsinOp::getOperationName(),
              math::AcosOp::getOperationName(), math::ExpOp::getOperationName(),
              math::ExpM1Op::getOperationName(),
              math::CbrtOp::getOperationName(), math::SinOp::getOperationName(),

>From 2114fd04211d9899d8df82abed82155c3834aa0c Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 11 Feb 2025 09:53:12 -0500
Subject: [PATCH 6/9] Undo useless edits.

---
 mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 0dde66f99dfc2..38e65c6fc0c22 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -1220,7 +1220,9 @@ ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
 //----------------------------------------------------------------------------//
 // Exp approximation.
 //----------------------------------------------------------------------------//
+
 namespace {
+
 Value clampWithNormals(ImplicitLocOpBuilder &builder,
                        const std::optional<VectorShape> shape, Value value,
                        float lowerBound, float upperBound) {
@@ -1859,7 +1861,7 @@ void mlir::populateMathPolynomialApproximationPatterns(
 }
 
 void mlir::populateMathPolynomialApproximationPatterns(
-    RewritePatternSet & patterns,
+    RewritePatternSet &patterns,
     const MathPolynomialApproximationOptions &options) {
   mlir::populateMathF32ExpansionPatterns(patterns, [](StringRef name) -> bool {
     return llvm::is_contained(

>From afafdcc3d3a7f518429bc2e85ad03d8f229bd353 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 14 Feb 2025 09:15:58 -0500
Subject: [PATCH 7/9] Address review comments

---
 mlir/include/mlir/Dialect/Math/IR/MathOps.td  |   8 +-
 .../Transforms/PolynomialApproximation.cpp    |  12 +-
 .../Math/polynomial-approximation.mlir        | 140 +++++++++---------
 3 files changed, 80 insertions(+), 80 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
index 87b68540c85dd..75d7a316526e2 100644
--- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td
+++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
@@ -567,10 +567,10 @@ def Math_ErfOp : Math_FloatUnaryOp<"erf"> {
 def Math_ErfcOp : Math_FloatUnaryOp<"erfc"> {
   let summary = "complementary error function of the specified value";
   let description = [{
-    The `erfc` operation computes the complementary error function.
-    It takes one operand of floating point type (i.e., scalar, tensor or
-    vector) and returns one result of the same type.
-    It has no standard attributes.
+    The `erfc` operation computes the complementary error function, defined
+    as 1-erf(x). It takes one operand of floating point type (i.e., scalar,
+    tensor or vector) and returns one result of the same type. It has no
+    standard attributes.
 
     Example:
 
diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 38e65c6fc0c22..342e27097894a 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -1125,6 +1125,12 @@ ErfPolynomialApproximation::matchAndRewrite(math::ErfOp op,
 // Approximates erfc(x) with p((x - 2) / (x + 2)), where p is a 9 degree
 // polynomial.This approximation is based on the following stackoverflow post:
 // https://stackoverflow.com/questions/35966695/vectorizable-implementation-of-complementary-error-function-erfcf
+// The stackoverflow post is in turn based on:
+// M. M. Shepherd and J. G. Laframboise, "Chebyshev Approximation of
+// (1+2x)exp(x^2)erfc x in 0 <= x < INF", Mathematics of Computation, Vol. 36,
+// No. 153, January 1981, pp. 249-253.
+//
+// Maximum error: 2.65 ulps
 LogicalResult
 ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
                                              PatternRewriter &rewriter) const {
@@ -1150,11 +1156,7 @@ ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
   Value posInf = bcast(f32FromBits(builder, 0x7f800000u));
   Value clampVal = bcast(floatCst(builder, 10.0546875f, et));
 
-  // Get abs(x)
-  Value isNegativeArg =
-      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, x, zero);
-  Value negArg = builder.create<arith::NegFOp>(x);
-  Value a = builder.create<arith::SelectOp>(isNegativeArg, negArg, x);
+  Value a = builder.create<math::AbsFOp>(x);
   Value p = builder.create<arith::AddFOp>(a, pos2);
   Value r = builder.create<arith::DivFOp>(one, p);
   Value q = builder.create<math::FmaOp>(neg4, r, one);
diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index 0a9bced067a65..bf7c4134af12e 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -116,77 +116,75 @@ func.func @erf_scalar(%arg0: f32) -> f32 {
 // CHECK-DAG:     %[[cst_26:.*]] = arith.constant 2.000000e+00 : f32
 // CHECK-DAG:     %[[cst_27:.*]] = arith.constant 0x7F800000 : f32
 // CHECK-DAG:     %[[cst_28:.*]] = arith.constant 10.0546875 : f32
-// CHECK:         %[[val_0:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
-// CHECK-NEXT:    %[[val_1:.*]] = arith.negf %[[val_arg0]] : f32
-// CHECK-NEXT:         %[[val_2:.*]] = arith.select %[[val_0]], %[[val_1]], %[[val_arg0]] : f32
-// CHECK-NEXT:         %[[val_3:.*]] = arith.addf %[[val_2]], %[[cst_26]] : f32
-// CHECK-NEXT:         %[[val_4:.*]] = arith.divf %[[cst_22]], %[[val_3]] : f32
-// CHECK-NEXT:         %[[val_5:.*]] = math.fma %[[cst_24]], %[[val_4]], %[[cst_22]] : f32
-// CHECK-NEXT:         %[[val_6:.*]] = arith.addf %[[val_5]], %[[cst_22]] : f32
-// CHECK-NEXT:         %[[val_7:.*]] = math.fma %[[val_6]], %[[cst_25]], %[[val_2]] : f32
-// CHECK-NEXT:         %[[val_8:.*]] = arith.negf %[[val_2]] : f32
-// CHECK-NEXT:         %[[val_9:.*]] = math.fma %[[val_8]], %[[val_5]], %[[val_7]] : f32
-// CHECK-NEXT:         %[[val_10:.*]] = math.fma %[[val_4]], %[[val_9]], %[[val_5]] : f32
-// CHECK-NEXT:         %[[val_11:.*]] = math.fma %[[cst_20]], %[[val_10]], %[[cst_19]] : f32
-// CHECK-NEXT:         %[[val_12:.*]] = math.fma %[[val_11]], %[[val_10]], %[[cst_18]] : f32
-// CHECK-NEXT:         %[[val_13:.*]] = math.fma %[[val_12]], %[[val_10]], %[[cst_17]] : f32
-// CHECK-NEXT:         %[[val_14:.*]] = math.fma %[[val_13]], %[[val_10]], %[[cst_16]] : f32
-// CHECK-NEXT:         %[[val_15:.*]] = math.fma %[[val_14]], %[[val_10]], %[[cst_15]] : f32
-// CHECK-NEXT:         %[[val_16:.*]] = math.fma %[[val_15]], %[[val_10]], %[[cst_14]] : f32
-// CHECK-NEXT:         %[[val_17:.*]] = math.fma %[[val_16]], %[[val_10]], %[[cst_13]] : f32
-// CHECK-NEXT:         %[[val_18:.*]] = math.fma %[[val_17]], %[[val_10]], %[[cst_12]] : f32
-// CHECK-NEXT:         %[[val_19:.*]] = math.fma %[[val_18]], %[[val_10]], %[[cst_11]] : f32
-// CHECK-NEXT:         %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
-// CHECK-NEXT:         %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
-// CHECK-NEXT:         %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
-// CHECK-NEXT:         %[[val_23:.*]] = arith.negf %[[val_2]] : f32
-// CHECK-NEXT:         %[[val_24:.*]] = math.fma %[[val_22]], %[[val_23]], %[[cst_23]] : f32
-// CHECK-NEXT:         %[[val_25:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
-// CHECK-NEXT:         %[[val_26:.*]] = math.fma %[[val_24]], %[[cst_26]], %[[val_25]] : f32
-// CHECK-NEXT:         %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
-// CHECK-NEXT:         %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
-// CHECK-NEXT:         %[[val_29:.*]] = arith.negf %[[val_28]] : f32
-// CHECK-NEXT:         %[[val_30:.*]] = arith.cmpf uge, %[[val_29]], %[[cst_2]] : f32
-// CHECK-NEXT:         %[[val_31:.*]] = arith.select %[[val_30]], %[[val_29]], %[[cst_2]] : f32
-// CHECK-NEXT:         %[[val_32:.*]] = arith.cmpf ule, %[[val_31]], %[[cst_1]] : f32
-// CHECK-NEXT:         %[[val_33:.*]] = arith.select %[[val_32]], %[[val_31]], %[[cst_1]] : f32
-// CHECK-NEXT:         %[[val_34:.*]] = math.fma %[[val_33]], %[[cst_10]], %[[cst_23]] : f32
-// CHECK-NEXT:         %[[val_35:.*]] = math.floor %[[val_34]] : f32
-// CHECK-NEXT:         %[[val_36:.*]] = arith.cmpf uge, %[[val_35]], %[[cst_0]] : f32
-// CHECK-NEXT:         %[[val_37:.*]] = arith.select %[[val_36]], %[[val_35]], %[[cst_0]] : f32
-// CHECK-NEXT:         %[[val_38:.*]] = arith.cmpf ule, %[[val_37]], %[[cst]] : f32
-// CHECK-NEXT:         %[[val_39:.*]] = arith.select %[[val_38]], %[[val_37]], %[[cst]] : f32
-// CHECK-NEXT:         %[[val_40:.*]] = math.fma %[[cst_9]], %[[val_39]], %[[val_33]] : f32
-// CHECK-NEXT:         %[[val_41:.*]] = math.fma %[[cst_8]], %[[val_39]], %[[val_40]] : f32
-// CHECK-NEXT:         %[[val_42:.*]] = math.fma %[[val_41]], %[[cst_7]], %[[cst_6]] : f32
-// CHECK-NEXT:         %[[val_43:.*]] = math.fma %[[val_42]], %[[val_41]], %[[cst_5]] : f32
-// CHECK-NEXT:         %[[val_44:.*]] = math.fma %[[val_43]], %[[val_41]], %[[cst_4]] : f32
-// CHECK-NEXT:         %[[val_45:.*]] = math.fma %[[val_44]], %[[val_41]], %[[cst_3]] : f32
-// CHECK-NEXT:         %[[val_46:.*]] = math.fma %[[val_45]], %[[val_41]], %[[cst_23]] : f32
-// CHECK-NEXT:         %[[val_47:.*]] = arith.mulf %[[val_41]], %[[val_41]] : f32
-// CHECK-NEXT:         %[[val_48:.*]] = math.fma %[[val_46]], %[[val_47]], %[[val_41]] : f32
-// CHECK-NEXT:         %[[val_49:.*]] = arith.addf %[[val_48]], %[[cst_22]] : f32
-// CHECK-NEXT:         %[[val_50:.*]] = arith.fptosi %[[val_39]] : f32 to i32
-// CHECK-NEXT:         %[[val_51:.*]] = arith.addi %[[val_50]], %[[c127_i32]] : i32
-// CHECK-NEXT:         %[[val_52:.*]] = arith.shli %[[val_51]], %[[c23_i32]] : i32
-// CHECK-NEXT:         %[[val_53:.*]] = arith.bitcast %[[val_52]] : i32 to f32
-// CHECK-NEXT:         %[[val_54:.*]] = arith.mulf %[[val_49]], %[[val_53]] : f32
-// CHECK-NEXT:         %[[val_55:.*]] = arith.negf %[[val_2]] : f32
-// CHECK-NEXT:         %[[val_56:.*]] = math.fma %[[val_55]], %[[val_2]], %[[val_28]] : f32
-// CHECK-NEXT:         %[[val_57:.*]] = arith.mulf %[[val_27]], %[[val_54]] : f32
-// CHECK-NEXT:         %[[val_58:.*]] = arith.mulf %[[val_57]], %[[val_56]] : f32
-// CHECK-NEXT:         %[[val_59:.*]] = math.fma %[[val_27]], %[[val_54]], %[[val_58]] : f32
-// CHECK-NEXT:         %[[val_60:.*]] = arith.cmpf olt, %[[val_2]], %[[cst_27]] : f32
-// CHECK-NEXT:         %[[val_61:.*]] = arith.xori %[[val_60]], %[[cst_true]] : i1
-// CHECK-NEXT:         %[[val_62:.*]] = arith.addf %[[val_arg0]], %[[val_arg0]] : f32
-// CHECK-NEXT:         %[[val_63:.*]] = arith.select %[[val_61]], %[[val_62]], %[[val_59]] : f32
-// CHECK-NEXT:         %[[val_64:.*]] = arith.cmpf ogt, %[[val_2]], %[[cst_28]] : f32
-// CHECK-NEXT:         %[[val_65:.*]] = arith.select %[[val_64]], %[[cst_21]], %[[val_63]] : f32
-// CHECK-NEXT:         %[[val_66:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
-// CHECK-NEXT:         %[[val_67:.*]] = arith.subf %[[cst_26]], %[[val_65]] : f32
-// CHECK-NEXT:         %[[val_68:.*]] = arith.select %[[val_66]], %[[val_67]], %[[val_65]] : f32
-// CHECK-NEXT:         return %[[val_68]] : f32
-// CHECK-NEXT:       }
+// CHECK:         %[[val_2:.*]] = math.absf %[[val_arg0]] : f32
+// CHECK-NEXT:    %[[val_3:.*]] = arith.addf %[[val_2]], %[[cst_26]] : f32
+// CHECK-NEXT:    %[[val_4:.*]] = arith.divf %[[cst_22]], %[[val_3]] : f32
+// CHECK-NEXT:    %[[val_5:.*]] = math.fma %[[cst_24]], %[[val_4]], %[[cst_22]] : f32
+// CHECK-NEXT:    %[[val_6:.*]] = arith.addf %[[val_5]], %[[cst_22]] : f32
+// CHECK-NEXT:    %[[val_7:.*]] = math.fma %[[val_6]], %[[cst_25]], %[[val_2]] : f32
+// CHECK-NEXT:    %[[val_8:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:    %[[val_9:.*]] = math.fma %[[val_8]], %[[val_5]], %[[val_7]] : f32
+// CHECK-NEXT:    %[[val_10:.*]] = math.fma %[[val_4]], %[[val_9]], %[[val_5]] : f32
+// CHECK-NEXT:    %[[val_11:.*]] = math.fma %[[cst_20]], %[[val_10]], %[[cst_19]] : f32
+// CHECK-NEXT:    %[[val_12:.*]] = math.fma %[[val_11]], %[[val_10]], %[[cst_18]] : f32
+// CHECK-NEXT:    %[[val_13:.*]] = math.fma %[[val_12]], %[[val_10]], %[[cst_17]] : f32
+// CHECK-NEXT:    %[[val_14:.*]] = math.fma %[[val_13]], %[[val_10]], %[[cst_16]] : f32
+// CHECK-NEXT:    %[[val_15:.*]] = math.fma %[[val_14]], %[[val_10]], %[[cst_15]] : f32
+// CHECK-NEXT:    %[[val_16:.*]] = math.fma %[[val_15]], %[[val_10]], %[[cst_14]] : f32
+// CHECK-NEXT:    %[[val_17:.*]] = math.fma %[[val_16]], %[[val_10]], %[[cst_13]] : f32
+// CHECK-NEXT:    %[[val_18:.*]] = math.fma %[[val_17]], %[[val_10]], %[[cst_12]] : f32
+// CHECK-NEXT:    %[[val_19:.*]] = math.fma %[[val_18]], %[[val_10]], %[[cst_11]] : f32
+// CHECK-NEXT:    %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
+// CHECK-NEXT:    %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
+// CHECK-NEXT:    %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
+// CHECK-NEXT:    %[[val_23:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:    %[[val_24:.*]] = math.fma %[[val_22]], %[[val_23]], %[[cst_23]] : f32
+// CHECK-NEXT:    %[[val_25:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
+// CHECK-NEXT:    %[[val_26:.*]] = math.fma %[[val_24]], %[[cst_26]], %[[val_25]] : f32
+// CHECK-NEXT:    %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
+// CHECK-NEXT:    %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
+// CHECK-NEXT:    %[[val_29:.*]] = arith.negf %[[val_28]] : f32
+// CHECK-NEXT:    %[[val_30:.*]] = arith.cmpf uge, %[[val_29]], %[[cst_2]] : f32
+// CHECK-NEXT:    %[[val_31:.*]] = arith.select %[[val_30]], %[[val_29]], %[[cst_2]] : f32
+// CHECK-NEXT:    %[[val_32:.*]] = arith.cmpf ule, %[[val_31]], %[[cst_1]] : f32
+// CHECK-NEXT:    %[[val_33:.*]] = arith.select %[[val_32]], %[[val_31]], %[[cst_1]] : f32
+// CHECK-NEXT:    %[[val_34:.*]] = math.fma %[[val_33]], %[[cst_10]], %[[cst_23]] : f32
+// CHECK-NEXT:    %[[val_35:.*]] = math.floor %[[val_34]] : f32
+// CHECK-NEXT:    %[[val_36:.*]] = arith.cmpf uge, %[[val_35]], %[[cst_0]] : f32
+// CHECK-NEXT:    %[[val_37:.*]] = arith.select %[[val_36]], %[[val_35]], %[[cst_0]] : f32
+// CHECK-NEXT:    %[[val_38:.*]] = arith.cmpf ule, %[[val_37]], %[[cst]] : f32
+// CHECK-NEXT:    %[[val_39:.*]] = arith.select %[[val_38]], %[[val_37]], %[[cst]] : f32
+// CHECK-NEXT:    %[[val_40:.*]] = math.fma %[[cst_9]], %[[val_39]], %[[val_33]] : f32
+// CHECK-NEXT:    %[[val_41:.*]] = math.fma %[[cst_8]], %[[val_39]], %[[val_40]] : f32
+// CHECK-NEXT:    %[[val_42:.*]] = math.fma %[[val_41]], %[[cst_7]], %[[cst_6]] : f32
+// CHECK-NEXT:    %[[val_43:.*]] = math.fma %[[val_42]], %[[val_41]], %[[cst_5]] : f32
+// CHECK-NEXT:    %[[val_44:.*]] = math.fma %[[val_43]], %[[val_41]], %[[cst_4]] : f32
+// CHECK-NEXT:    %[[val_45:.*]] = math.fma %[[val_44]], %[[val_41]], %[[cst_3]] : f32
+// CHECK-NEXT:    %[[val_46:.*]] = math.fma %[[val_45]], %[[val_41]], %[[cst_23]] : f32
+// CHECK-NEXT:    %[[val_47:.*]] = arith.mulf %[[val_41]], %[[val_41]] : f32
+// CHECK-NEXT:    %[[val_48:.*]] = math.fma %[[val_46]], %[[val_47]], %[[val_41]] : f32
+// CHECK-NEXT:    %[[val_49:.*]] = arith.addf %[[val_48]], %[[cst_22]] : f32
+// CHECK-NEXT:    %[[val_50:.*]] = arith.fptosi %[[val_39]] : f32 to i32
+// CHECK-NEXT:    %[[val_51:.*]] = arith.addi %[[val_50]], %[[c127_i32]] : i32
+// CHECK-NEXT:    %[[val_52:.*]] = arith.shli %[[val_51]], %[[c23_i32]] : i32
+// CHECK-NEXT:    %[[val_53:.*]] = arith.bitcast %[[val_52]] : i32 to f32
+// CHECK-NEXT:    %[[val_54:.*]] = arith.mulf %[[val_49]], %[[val_53]] : f32
+// CHECK-NEXT:    %[[val_55:.*]] = arith.negf %[[val_2]] : f32
+// CHECK-NEXT:    %[[val_56:.*]] = math.fma %[[val_55]], %[[val_2]], %[[val_28]] : f32
+// CHECK-NEXT:    %[[val_57:.*]] = arith.mulf %[[val_27]], %[[val_54]] : f32
+// CHECK-NEXT:    %[[val_58:.*]] = arith.mulf %[[val_57]], %[[val_56]] : f32
+// CHECK-NEXT:    %[[val_59:.*]] = math.fma %[[val_27]], %[[val_54]], %[[val_58]] : f32
+// CHECK-NEXT:    %[[val_60:.*]] = arith.cmpf olt, %[[val_2]], %[[cst_27]] : f32
+// CHECK-NEXT:    %[[val_61:.*]] = arith.xori %[[val_60]], %[[cst_true]] : i1
+// CHECK-NEXT:    %[[val_62:.*]] = arith.addf %[[val_arg0]], %[[val_arg0]] : f32
+// CHECK-NEXT:    %[[val_63:.*]] = arith.select %[[val_61]], %[[val_62]], %[[val_59]] : f32
+// CHECK-NEXT:    %[[val_64:.*]] = arith.cmpf ogt, %[[val_2]], %[[cst_28]] : f32
+// CHECK-NEXT:    %[[val_65:.*]] = arith.select %[[val_64]], %[[cst_21]], %[[val_63]] : f32
+// CHECK-NEXT:    %[[val_66:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
+// CHECK-NEXT:    %[[val_67:.*]] = arith.subf %[[cst_26]], %[[val_65]] : f32
+// CHECK-NEXT:    %[[val_68:.*]] = arith.select %[[val_66]], %[[val_67]], %[[val_65]] : f32
+// CHECK-NEXT:    return %[[val_68]] : f32
+// CHECK-NEXT: }
 
 func.func @erfc_scalar(%arg0: f32) -> f32 {
   %0 = math.erfc %arg0 : f32

>From c9a236a78755de3280626308af857dc65a0b5443 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 14 Feb 2025 11:26:31 -0500
Subject: [PATCH 8/9] Address review comments v2

---
 mlir/lib/Dialect/Math/IR/MathOps.cpp                        | 6 +++---
 .../lib/Dialect/Math/Transforms/PolynomialApproximation.cpp | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/Math/IR/MathOps.cpp b/mlir/lib/Dialect/Math/IR/MathOps.cpp
index 40e60e80abfbf..9c4d88e2191ce 100644
--- a/mlir/lib/Dialect/Math/IR/MathOps.cpp
+++ b/mlir/lib/Dialect/Math/IR/MathOps.cpp
@@ -339,10 +339,10 @@ OpFoldResult math::ErfOp::fold(FoldAdaptor adaptor) {
 OpFoldResult math::ErfcOp::fold(FoldAdaptor adaptor) {
   return constFoldUnaryOpConditional<FloatAttr>(
       adaptor.getOperands(), [](const APFloat &a) -> std::optional<APFloat> {
-        switch (a.getSizeInBits(a.getSemantics())) {
-        case 64:
+        switch (APFloat::SemanticsToEnum(a.getSemantics())) {
+        case APFloat::Semantics::S_IEEEdouble:
           return APFloat(erfc(a.convertToDouble()));
-        case 32:
+        case APFloat::Semantics::S_IEEEsingle:
           return APFloat(erfcf(a.convertToFloat()));
         default:
           return {};
diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 342e27097894a..167eebd786dba 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -1153,7 +1153,7 @@ ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
   Value neg4 = bcast(floatCst(builder, -4.0f, et));
   Value neg2 = bcast(floatCst(builder, -2.0f, et));
   Value pos2 = bcast(floatCst(builder, 2.0f, et));
-  Value posInf = bcast(f32FromBits(builder, 0x7f800000u));
+  Value posInf = bcast(floatCst(builder, INFINITY, et));
   Value clampVal = bcast(floatCst(builder, 10.0546875f, et));
 
   Value a = builder.create<math::AbsFOp>(x);

>From 4a0b672f354020d70dbfd432b5957f73188ece7a Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 14 Feb 2025 11:33:28 -0500
Subject: [PATCH 9/9] Improve description

---
 mlir/include/mlir/Dialect/Math/IR/MathOps.td | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
index 75d7a316526e2..16ce4e2366c76 100644
--- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td
+++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
@@ -567,8 +567,11 @@ def Math_ErfOp : Math_FloatUnaryOp<"erf"> {
 def Math_ErfcOp : Math_FloatUnaryOp<"erfc"> {
   let summary = "complementary error function of the specified value";
   let description = [{
-    The `erfc` operation computes the complementary error function, defined
-    as 1-erf(x). It takes one operand of floating point type (i.e., scalar,
+
+    The `erfc` operation computes the complementary error function, defined as
+    1-erf(x). This function is part of libm and is needed for accuracy, since
+    simply calculating 1-erf(x) when x is close to 1 will give inaccurate results.
+    It takes one operand of floating point type (i.e., scalar,
     tensor or vector) and returns one result of the same type. It has no
     standard attributes.