[clang] [llvm] [Transforms][Utils][PromoteMem2Reg] Propagate nnan flag on par with the nsz flag (PR #114271)

Paul Osmialowski via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 5 07:49:38 PST 2024


https://github.com/pawosm-arm updated https://github.com/llvm/llvm-project/pull/114271

>From a426e17ed3c3cad6b3c813bd996e649a2e544591 Mon Sep 17 00:00:00 2001
From: Pawel Osmialowski <pawel.osmialowski at arm.com>
Date: Wed, 30 Oct 2024 14:30:08 +0000
Subject: [PATCH] [Transforms][Utils][PromoteMem2Reg] Propagate nnan and ninf
 flags on par with the nsz flag

Following the change introduced by the PR #83381, this patch extends
it with the same treatment of the nnan and ninf fast-math flags.

This is to address the performance drop caused by PR#83200 which
prevented vital InstCombine transformation due to the lack of the
relevant fast-math flag.

The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes
are being created. Proposed change allows propagation of the nnan and
ninf flags down to these Phi nodes.
---
 clang/test/Headers/__clang_hip_math.hip       |   8 +-
 .../Utils/PromoteMemoryToRegister.cpp         |  17 +++
 .../SROA/propagate-fast-math-flags-on-phi.ll  | 134 ++++++++++++++++++
 3 files changed, 155 insertions(+), 4 deletions(-)

diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index e4254d1e64bec9..f64165e2e4bc62 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -1727,7 +1727,7 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
 // FINITEONLY:       _ZL3jnfif.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret float [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_jnf(
@@ -1830,7 +1830,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
 // FINITEONLY:       _ZL2jnid.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret double [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_jn(
@@ -4461,7 +4461,7 @@ extern "C" __device__ double test_y1(double x) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
 // FINITEONLY:       _ZL3ynfif.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret float [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_ynf(
@@ -4564,7 +4564,7 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
 // FINITEONLY:       _ZL2ynid.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret double [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_yn(
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 656bb1ebd1161e..d8657de3e58b09 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -394,6 +394,12 @@ struct PromoteMem2Reg {
   /// Whether the function has the no-signed-zeros-fp-math attribute set.
   bool NoSignedZeros = false;
 
+  /// Whether the function has the no-nans-fp-math attribute set.
+  bool NoNaNs = false;
+
+  /// Whether the function has the no-infs-fp-math attribute set.
+  bool NoInfs = false;
+
 public:
   PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
                  AssumptionCache *AC)
@@ -752,6 +758,8 @@ void PromoteMem2Reg::run() {
   ForwardIDFCalculator IDF(DT);
 
   NoSignedZeros = F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool();
+  NoNaNs = F.getFnAttribute("no-nans-fp-math").getValueAsBool();
+  NoInfs = F.getFnAttribute("no-infs-fp-math").getValueAsBool();
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
@@ -1140,6 +1148,15 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
         if (isa<FPMathOperator>(APN) && NoSignedZeros)
           APN->setHasNoSignedZeros(true);
 
+        // This allows select instruction folding relevant to floating point
+        // reductions whose operand is a PHI.
+        if (isa<FPMathOperator>(APN) && NoNaNs)
+          APN->setHasNoNaNs(true);
+
+        // Handle NoInfs flag too.
+        if (isa<FPMathOperator>(APN) && NoInfs)
+          APN->setHasNoInfs(true);
+
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
         AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
index 2cc26363daf9c5..c669586b4423e1 100644
--- a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
+++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
@@ -77,3 +77,137 @@ return:                          ; preds = %entry,%if.then
   %retval = load double, ptr %x.addr
   ret double %retval
 }
+
+define double @phi_with_nnan(double %x) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define double @phi_with_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi nnan double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}
+
+define <2 x double> @vector_phi_with_nnan(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_nnan(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi nnan <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret <2 x double> [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca <2 x double>
+  store <2 x double> %a, ptr %x.addr
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  store <2 x double> %b, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load <2 x double>, ptr %x.addr
+  ret <2 x double> %retval
+}
+
+define double @phi_without_nnan(double %x) "no-nans-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}
+
+define <2 x double> @vector_phi_with_ninf(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-infs-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_ninf(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi ninf <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret <2 x double> [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca <2 x double>
+  store <2 x double> %a, ptr %x.addr
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  store <2 x double> %b, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load <2 x double>, ptr %x.addr
+  ret <2 x double> %retval
+}
+
+define double @phi_without_ninf(double %x) "no-infs-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_ninf(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}



More information about the cfe-commits mailing list