[llvm] [InstCombine] Fold fmod to frem if we know it does not set errno. (PR #107912)

Tue Sep 10 01:37:37 PDT 2024

https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/107912

>From 1f9640839fce81143a143cf03ce5576365217a07 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 9 Sep 2024 17:31:30 +0100
Subject: [PATCH 1/2] [InstCombine] Test for fmod -> frem folding. NFC

---
 llvm/test/Transforms/InstCombine/fmod.ll | 104 +++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/fmod.ll

diff --git a/llvm/test/Transforms/InstCombine/fmod.ll b/llvm/test/Transforms/InstCombine/fmod.ll
new file mode 100644
index 00000000000000..80c42d546e6c9c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmod.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define float @test_inf_const(float %f) {
+; CHECK-LABEL: define float @test_inf_const(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ABS:%.*]] = tail call float @llvm.fabs.f32(float [[F]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq float [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    br i1 [[ISINF]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00)
+; CHECK-NEXT:    ret float [[CALL]]
+; CHECK:       return:
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %abs = tail call float @llvm.fabs.f32(float %f)
+  %isinf = fcmp oeq float %abs, 0x7FF0000000000000
+  br i1 %isinf, label %return, label %if.end
+
+if.end:
+  %call = tail call float @fmodf(float %f, float 2.0)
+  ret float %call
+
+return:
+  ret float 0.0
+}
+
+define float @test_const_zero(float %f) {
+; CHECK-LABEL: define float @test_const_zero(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ISZERO:%.*]] = fcmp oeq float [[F]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[ISZERO]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @fmodf(float 2.000000e+00, float [[F]])
+; CHECK-NEXT:    ret float [[CALL]]
+; CHECK:       return:
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %iszero = fcmp oeq float %f, 0.0
+  br i1 %iszero, label %return, label %if.end
+
+if.end:
+  %call = tail call float @fmodf(float 2.0, float %f)
+  ret float %call
+
+return:
+  ret float 0.0
+}
+
+define float @test_unknown_const(float %f) {
+; CHECK-LABEL: define float @test_unknown_const(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00)
+; CHECK-NEXT:    ret float [[CALL]]
+;
+entry:
+  %call = tail call float @fmodf(float %f, float 2.000000e+00)
+  ret float %call
+}
+
+define float @test_noinf_nozero(float nofpclass(inf) %f, float nofpclass(zero) %g) {
+; CHECK-LABEL: define float @test_noinf_nozero(
+; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
+entry:
+  %call = tail call nnan float @fmodf(float %f, float %g)
+  ret float %call
+}
+
+define double @test_double(double nofpclass(inf) %f, double nofpclass(zero) %g) {
+; CHECK-LABEL: define double @test_double(
+; CHECK-SAME: double nofpclass(inf) [[F:%.*]], double nofpclass(zero) [[G:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double @fmod(double [[F]], double [[G]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+entry:
+  %call = tail call double @fmod(double %f, double %g)
+  ret double %call
+}
+
+define fp128 @test_fp128(fp128 nofpclass(inf) %f, fp128 nofpclass(zero) %g) {
+; CHECK-LABEL: define fp128 @test_fp128(
+; CHECK-SAME: fp128 nofpclass(inf) [[F:%.*]], fp128 nofpclass(zero) [[G:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call fp128 @fmodl(fp128 [[F]], fp128 [[G]])
+; CHECK-NEXT:    ret fp128 [[CALL]]
+;
+entry:
+  %call = tail call fp128 @fmodl(fp128 %f, fp128 %g)
+  ret fp128 %call
+}
+
+declare float @fmodf(float, float)
+declare double @fmod(double, double)
+declare fp128 @fmodl(fp128, fp128)

>From a78d378251498540aa23be907bd80afcdd4b794f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 10 Sep 2024 09:32:33 +0100
Subject: [PATCH 2/2] [InstCombine] Fold fmod to frem if we know it does not
 set errno.

fmod will be folded to frem in clang under -fno-math-errno and can be constant
folded in llvm if the operands are known. It can be relatively common to have
fp code that handles special values before doing some calculation:
```
if (isnan(f))
  return handlenan;
if (isinf(f))
  return handleinf;
..
fmod(f, 2.0)
```

This patch enables the folding of fmod to frem in InstCombine (via
LibCallSimplifier) if the first parameter is not inf and the second is not
zero. Other combinations do not set errno.
---
 .../llvm/Transforms/Utils/SimplifyLibCalls.h  |  8 +++-
 .../InstCombine/InstCombineCalls.cpp          |  4 +-
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 37 +++++++++++++++----
 llvm/test/Transforms/InstCombine/fmod.ll      | 10 ++---
 4 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 43b5c9250a8908..2d3d2ada6183a7 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -22,6 +22,8 @@ class AssumptionCache;
 class StringRef;
 class Value;
 class CallInst;
+class DominatorTree;
+class DomConditionCache;
 class DataLayout;
 class Instruction;
 class IRBuilderBase;
@@ -103,6 +105,8 @@ class LibCallSimplifier {
   FortifiedLibCallSimplifier FortifiedSimplifier;
   const DataLayout &DL;
   const TargetLibraryInfo *TLI;
+  DominatorTree *DT;
+  DomConditionCache *DC;
   AssumptionCache *AC;
   OptimizationRemarkEmitter &ORE;
   BlockFrequencyInfo *BFI;
@@ -136,7 +140,8 @@ class LibCallSimplifier {
 
 public:
   LibCallSimplifier(
-      const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC,
+      const DataLayout &DL, const TargetLibraryInfo *TLI, DominatorTree *DT,
+      DomConditionCache *DC, AssumptionCache *AC,
       OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
       ProfileSummaryInfo *PSI,
       function_ref<void(Instruction *, Value *)> Replacer =
@@ -201,6 +206,7 @@ class LibCallSimplifier {
   Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
   Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
   Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
+  Value *optimizeFMod(CallInst *CI, IRBuilderBase &B);
   Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B);
   Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
   Value *optimizeTrigInversionPairs(CallInst *CI, IRBuilderBase &B);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index eb94e894b57b06..61011d55227e7b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3711,8 +3711,8 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   auto InstCombineErase = [this](Instruction *I) {
     eraseInstFromFunction(*I);
   };
-  LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW,
-                               InstCombineErase);
+  LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
+                               InstCombineRAUW, InstCombineErase);
   if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
     ++NumSimplified;
     return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1e6dc88ed93532..2ad48a8eb3f93f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2278,8 +2278,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
   // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
   // errno), but sqrt(-Inf) is required by various standards to set errno.
   if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
-      !isKnownNeverInfinity(Base, 0,
-                            SimplifyQuery(DL, TLI, /*DT=*/nullptr, AC, Pow)))
+      !isKnownNeverInfinity(
+          Base, 0, SimplifyQuery(DL, TLI, DT, AC, Pow, true, true, DC)))
     return nullptr;
 
   Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,
@@ -2796,6 +2796,25 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
   return copyFlags(*CI, FabsCall);
 }
 
+Value *LibCallSimplifier::optimizeFMod(CallInst *CI, IRBuilderBase &B) {
+  SimplifyQuery SQ(DL, TLI, DT, AC, CI, true, true, DC);
+
+  // fmod(x,y) can set errno if y == 0 or x == +/-inf.
+  KnownFPClass Known0 = computeKnownFPClass(CI->getOperand(0), fcInf,
+                                            /*Depth=*/0, SQ);
+  if (Known0.isKnownNeverInfinity()) {
+    KnownFPClass Known1 = computeKnownFPClass(CI->getOperand(1), fcZero,
+                                              /*Depth=*/0, SQ);
+    if (Known1.isKnownNeverZero()) {
+      Value *FRem = B.CreateFRemFMF(CI->getOperand(0), CI->getOperand(1), CI);
+      substituteInParent(CI, FRem);
+      return nullptr;
+    }
+  }
+
+  return nullptr;
+}
+
 Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI,
                                                      IRBuilderBase &B) {
   Module *M = CI->getModule();
@@ -3945,6 +3964,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
   case LibFunc_sqrt:
   case LibFunc_sqrtl:
     return optimizeSqrt(CI, Builder);
+  case LibFunc_fmod:
+  case LibFunc_fmodf:
+  case LibFunc_fmodl:
+    return optimizeFMod(CI, Builder);
   case LibFunc_logf:
   case LibFunc_log:
   case LibFunc_logl:
@@ -4162,13 +4185,13 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
 }
 
 LibCallSimplifier::LibCallSimplifier(
-    const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC,
-    OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
-    ProfileSummaryInfo *PSI,
+    const DataLayout &DL, const TargetLibraryInfo *TLI, DominatorTree *DT,
+    DomConditionCache *DC, AssumptionCache *AC, OptimizationRemarkEmitter &ORE,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
     function_ref<void(Instruction *, Value *)> Replacer,
     function_ref<void(Instruction *)> Eraser)
-    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), AC(AC), ORE(ORE), BFI(BFI),
-      PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
+    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), DT(DT), DC(DC), AC(AC),
+      ORE(ORE), BFI(BFI), PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
 
 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
   // Indirect through the replacer used in this instance.
diff --git a/llvm/test/Transforms/InstCombine/fmod.ll b/llvm/test/Transforms/InstCombine/fmod.ll
index 80c42d546e6c9c..d62976c4b2dd9b 100644
--- a/llvm/test/Transforms/InstCombine/fmod.ll
+++ b/llvm/test/Transforms/InstCombine/fmod.ll
@@ -9,7 +9,7 @@ define float @test_inf_const(float %f) {
 ; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq float [[ABS]], 0x7FF0000000000000
 ; CHECK-NEXT:    br i1 [[ISINF]], label [[RETURN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00)
+; CHECK-NEXT:    [[CALL:%.*]] = frem float [[F]], 2.000000e+00
 ; CHECK-NEXT:    ret float [[CALL]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret float 0.000000e+00
@@ -34,7 +34,7 @@ define float @test_const_zero(float %f) {
 ; CHECK-NEXT:    [[ISZERO:%.*]] = fcmp oeq float [[F]], 0.000000e+00
 ; CHECK-NEXT:    br i1 [[ISZERO]], label [[RETURN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call float @fmodf(float 2.000000e+00, float [[F]])
+; CHECK-NEXT:    [[CALL:%.*]] = frem float 2.000000e+00, [[F]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret float 0.000000e+00
@@ -67,7 +67,7 @@ define float @test_noinf_nozero(float nofpclass(inf) %f, float nofpclass(zero) %
 ; CHECK-LABEL: define float @test_noinf_nozero(
 ; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT:    [[CALL:%.*]] = frem nnan float [[F]], [[G]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
@@ -79,7 +79,7 @@ define double @test_double(double nofpclass(inf) %f, double nofpclass(zero) %g)
 ; CHECK-LABEL: define double @test_double(
 ; CHECK-SAME: double nofpclass(inf) [[F:%.*]], double nofpclass(zero) [[G:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call double @fmod(double [[F]], double [[G]])
+; CHECK-NEXT:    [[CALL:%.*]] = frem double [[F]], [[G]]
 ; CHECK-NEXT:    ret double [[CALL]]
 ;
 entry:
@@ -91,7 +91,7 @@ define fp128 @test_fp128(fp128 nofpclass(inf) %f, fp128 nofpclass(zero) %g) {
 ; CHECK-LABEL: define fp128 @test_fp128(
 ; CHECK-SAME: fp128 nofpclass(inf) [[F:%.*]], fp128 nofpclass(zero) [[G:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call fp128 @fmodl(fp128 [[F]], fp128 [[G]])
+; CHECK-NEXT:    [[CALL:%.*]] = frem fp128 [[F]], [[G]]
 ; CHECK-NEXT:    ret fp128 [[CALL]]
 ;
 entry: