[Mlir-commits] [mlir] 1c0374e - [mlir] Add polynomial approximation for math::Log1p

Mon May 3 15:11:44 PDT 2021

Author: Emilio Cota
Date: 2021-05-03T15:11:37-07:00
New Revision: 1c0374e770956037764f9cae8a5e35ec7cefcdc5

URL: https://github.com/llvm/llvm-project/commit/1c0374e770956037764f9cae8a5e35ec7cefcdc5
DIFF: https://github.com/llvm/llvm-project/commit/1c0374e770956037764f9cae8a5e35ec7cefcdc5.diff

LOG: [mlir] Add polynomial approximation for math::Log1p

This approximation matches the one in Eigen.

```
name                      old cpu/op  new cpu/op  delta
BM_mlir_Log1p_f32/10      83.2ns ± 7%  34.8ns ± 5%  -58.19%    (p=0.000 n=84+71)
BM_mlir_Log1p_f32/100      664ns ± 4%   129ns ± 4%  -80.57%    (p=0.000 n=82+82)
BM_mlir_Log1p_f32/1k      6.75µs ± 4%  0.81µs ± 3%  -88.07%    (p=0.000 n=88+79)
BM_mlir_Log1p_f32/10k     76.5µs ± 3%   7.8µs ± 4%  -89.84%    (p=0.000 n=80+80)
BM_eigen_s_Log1p_f32/10   70.1ns ±14%  72.6ns ±14%   +3.49%  (p=0.000 n=116+112)
BM_eigen_s_Log1p_f32/100   706ns ± 9%   717ns ± 3%   +1.60%   (p=0.018 n=117+80)
BM_eigen_s_Log1p_f32/1k   8.26µs ± 1%  8.26µs ± 1%     ~       (p=0.567 n=84+86)
BM_eigen_s_Log1p_f32/10k  92.1µs ± 5%  92.6µs ± 6%   +0.60%  (p=0.047 n=115+115)
BM_eigen_v_Log1p_f32/10   31.8ns ±24%  34.9ns ±17%   +9.72%    (p=0.000 n=98+96)
BM_eigen_v_Log1p_f32/100   169ns ±10%   177ns ± 5%   +4.66%   (p=0.000 n=119+81)
BM_eigen_v_Log1p_f32/1k   1.42µs ± 4%  1.46µs ± 8%   +2.70%   (p=0.000 n=93+113)
BM_eigen_v_Log1p_f32/10k  14.4µs ± 5%  14.9µs ± 8%   +3.61%  (p=0.000 n=115+110)
```

Reviewed By: ezhulenev, ftynse

Differential Revision: https://reviews.llvm.org/D101765

Added: 
    

Modified: 
    mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
    mlir/test/Dialect/Math/polynomial-approximation.mlir
    mlir/test/mlir-cpu-runner/math_polynomial_approx.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index d9c78f527c8a..79856b9596dc 100644

--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -413,6 +413,53 @@ struct Log2Approximation : public LogApproximationBase<math::Log2Op> {
 };
 } // namespace
 
+//----------------------------------------------------------------------------//
+// Log1p approximation.
+//----------------------------------------------------------------------------//
+
+namespace {
+struct Log1pApproximation : public OpRewritePattern<math::Log1pOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(math::Log1pOp op,
+                                PatternRewriter &rewriter) const final;
+};
+} // namespace
+
+// Approximate log(1+x).
+LogicalResult
+Log1pApproximation::matchAndRewrite(math::Log1pOp op,
+                                    PatternRewriter &rewriter) const {
+  auto width = vectorWidth(op.operand().getType(), isF32);
+  if (!width.hasValue())
+    return rewriter.notifyMatchFailure(op, "unsupported operand type");
+
+  ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
+  auto bcast = [&](Value value) -> Value {
+    return broadcast(builder, value, *width);
+  };
+
+  // Approximate log(1+x) using the following, due to W. Kahan:
+  //   u = x + 1.0;
+  //   if (u == 1.0 || u == inf) return x;
+  //   return x * log(u) / (u - 1.0);
+  //          ^^^^^^^^^^^^^^^^^^^^^^
+  //             "logLarge" below.
+  Value cstOne = bcast(f32Cst(builder, 1.0f));
+  Value x = op.operand();
+  Value u = builder.create<AddFOp>(x, cstOne);
+  Value uSmall = builder.create<CmpFOp>(CmpFPredicate::OEQ, u, cstOne);
+  Value logU = builder.create<math::LogOp>(u);
+  Value uInf = builder.create<CmpFOp>(CmpFPredicate::OEQ, u, logU);
+  Value logLarge = builder.create<MulFOp>(
+      x, builder.create<DivFOp>(logU, builder.create<SubFOp>(u, cstOne)));
+  Value approximation = builder.create<SelectOp>(
+      builder.create<LLVM::OrOp>(uSmall, uInf), x, logLarge);
+  rewriter.replaceOp(op, approximation);
+  return success();
+}
+
 //----------------------------------------------------------------------------//
 // Exp approximation.
 //----------------------------------------------------------------------------//
@@ -534,5 +581,5 @@ ExpApproximation::matchAndRewrite(math::ExpOp op,
 void mlir::populateMathPolynomialApproximationPatterns(
     RewritePatternSet &patterns) {
   patterns.add<TanhApproximation, LogApproximation, Log2Approximation,
-               ExpApproximation>(patterns.getContext());
+               Log1pApproximation, ExpApproximation>(patterns.getContext());
 }

diff  --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index 5e3b3098cfac..5a54d4a7f10a 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -10,7 +10,8 @@ func @scalar(%arg0: f32) -> f32 {
   // CHECK-NOT: log
   %1 = math.log %0 : f32
   %2 = math.log2 %1 : f32
-  return %2 : f32
+  %3 = math.log1p %2 : f32
+  return %3 : f32
 }
 
 // CHECK-LABEL: @vector
@@ -20,7 +21,8 @@ func @vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   // CHECK-NOT: log
   %1 = math.log %0 : vector<8xf32>
   %2 = math.log2 %1 : vector<8xf32>
-  return %2 : vector<8xf32>
+  %3 = math.log1p %2 : vector<8xf32>
+  return %3 : vector<8xf32>
 }
 
 // CHECK-LABEL: @exp_scalar

diff  --git a/mlir/test/mlir-cpu-runner/math_polynomial_approx.mlir b/mlir/test/mlir-cpu-runner/math_polynomial_approx.mlir
index 02fc5241f452..558bc949283a 100644
--- a/mlir/test/mlir-cpu-runner/math_polynomial_approx.mlir
+++ b/mlir/test/mlir-cpu-runner/math_polynomial_approx.mlir
@@ -110,6 +110,45 @@ func @log2() {
   return
 }
 
+func @log1p() {
+  // CHECK: 0.00995033
+  %0 = constant 0.01 : f32
+  %1 = math.log1p %0 : f32
+  vector.print %1 : f32
+
+  // CHECK: -4.60517, -0.693147, 0, 1.38629
+  %2 = constant dense<[-0.99, -0.5, 0.0, 3.0]> : vector<4xf32>
+  %3 = math.log1p %2 : vector<4xf32>
+  vector.print %3 : vector<4xf32>
+
+  // CHECK: 0.0953102, 0.182322, 0.262364, 0.336472, 0.405465, 0.470004, 0.530628, 0.587787
+  %4 = constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
+  %5 = math.log1p %4 : vector<8xf32>
+  vector.print %5 : vector<8xf32>
+
+  // CHECK: -inf
+  %neg_one = constant -1.0 : f32
+  %log_neg_one = math.log1p %neg_one : f32
+  vector.print %log_neg_one : f32
+
+  // CHECK: nan
+  %neg_two = constant -2.0 : f32
+  %log_neg_two = math.log1p %neg_two : f32
+  vector.print %log_neg_two : f32
+
+  // CHECK: inf
+  %inf = constant 0x7f800000 : f32
+  %log_inf = math.log1p %inf : f32
+  vector.print %log_inf : f32
+
+  // CHECK: -inf, nan, inf, 9.99995e-06
+  %special_vec = constant dense<[-1.0, -1.1, 0x7f800000, 0.00001]> : vector<4xf32>
+  %log_special_vec = math.log1p %special_vec : vector<4xf32>
+  vector.print %log_special_vec : vector<4xf32>
+
+  return
+}
+
 // -------------------------------------------------------------------------- //
 // Exp.
 // -------------------------------------------------------------------------- //
@@ -151,6 +190,7 @@ func @main() {
   call @tanh(): () -> ()
   call @log(): () -> ()
   call @log2(): () -> ()
+  call @log1p(): () -> ()
   call @exp(): () -> ()
   return
 }