[clang] [llvm] [Transforms][Utils][PromoteMem2Reg] Propagate nnan flag on par with the nsz flag (PR #114271)

Tue Nov 5 13:24:47 PST 2024

https://github.com/pawosm-arm updated https://github.com/llvm/llvm-project/pull/114271

>From 45a02d4dd6102bf825943e738a15adf64361f976 Mon Sep 17 00:00:00 2001
From: Pawel Osmialowski <pawel.osmialowski at arm.com>
Date: Wed, 30 Oct 2024 14:30:08 +0000
Subject: [PATCH] [Transforms][Utils][PromoteMem2Reg] Propagate nnan and ninf
 flags on par with the nsz flag

Following the change introduced by the PR #83381, this patch extends
it with the same treatment of the nnan and ninf fast-math flags.

This is to address the performance drop caused by PR#83200 which
prevented vital InstCombine transformation due to the lack of the
relevant fast-math flag.

The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes
are being created. Proposed change allows propagation of the nnan and
ninf flags down to these Phi nodes.
---
 clang/test/Headers/__clang_hip_math.hip       |   8 +-
 .../Utils/PromoteMemoryToRegister.cpp         |  33 ++++-
 .../SROA/propagate-fast-math-flags-on-phi.ll  | 134 ++++++++++++++++++
 3 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index e4254d1e64bec9..f64165e2e4bc62 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -1727,7 +1727,7 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
 // FINITEONLY:       _ZL3jnfif.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret float [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_jnf(
@@ -1830,7 +1830,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
 // FINITEONLY:       _ZL2jnid.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret double [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_jn(
@@ -4461,7 +4461,7 @@ extern "C" __device__ double test_y1(double x) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
 // FINITEONLY:       _ZL3ynfif.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret float [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_ynf(
@@ -4564,7 +4564,7 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // FINITEONLY-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
 // FINITEONLY-NEXT:    br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
 // FINITEONLY:       _ZL2ynid.exit:
-// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
+// FINITEONLY-NEXT:    [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
 // FINITEONLY-NEXT:    ret double [[RETVAL_0_I]]
 //
 // APPROX-LABEL: @test_yn(
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 656bb1ebd1161e..aa3f825265f816 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -394,6 +394,12 @@ struct PromoteMem2Reg {
   /// Whether the function has the no-signed-zeros-fp-math attribute set.
   bool NoSignedZeros = false;
 
+  /// Whether the function has the no-nans-fp-math attribute set.
+  bool NoNaNs = false;
+
+  /// Whether the function has the no-infs-fp-math attribute set.
+  bool NoInfs = false;
+
 public:
   PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
                  AssumptionCache *AC)
@@ -752,6 +758,8 @@ void PromoteMem2Reg::run() {
   ForwardIDFCalculator IDF(DT);
 
   NoSignedZeros = F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool();
+  NoNaNs = F.getFnAttribute("no-nans-fp-math").getValueAsBool();
+  NoInfs = F.getFnAttribute("no-infs-fp-math").getValueAsBool();
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
@@ -1132,13 +1140,24 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
         for (unsigned i = 0; i != NumEdges; ++i)
           APN->addIncoming(IncomingVals[AllocaNo], Pred);
 
-        // For the  sequence `return X > 0.0 ? X : -X`, it is expected that this
-        // results in fabs intrinsic. However, without no-signed-zeros(nsz) flag
-        // on the phi node generated at this stage, fabs folding does not
-        // happen. So, we try to infer nsz flag from the function attributes to
-        // enable this fabs folding.
-        if (isa<FPMathOperator>(APN) && NoSignedZeros)
-          APN->setHasNoSignedZeros(true);
+        if (isa<FPMathOperator>(APN)) {
+          // For the sequence `return X > 0.0 ? X : -X`, it is expected that
+          // this results in fabs intrinsic. However, without
+          // no-signed-zeros(nsz) flag on the phi node generated at this stage,
+          // fabs folding does not happen. So, we try to infer nsz flag from the
+          // function attributes to enable this fabs folding.
+          if (NoSignedZeros)
+            APN->setHasNoSignedZeros(true);
+
+          // This allows select instruction folding relevant to floating point
+          // reductions whose operand is a PHI.
+          if (NoNaNs)
+            APN->setHasNoNaNs(true);
+
+          // Handle NoInfs flag too.
+          if (NoInfs)
+            APN->setHasNoInfs(true);
+        }
 
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
index 2cc26363daf9c5..c669586b4423e1 100644
--- a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
+++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
@@ -77,3 +77,137 @@ return:                          ; preds = %entry,%if.then
   %retval = load double, ptr %x.addr
   ret double %retval
 }
+
+define double @phi_with_nnan(double %x) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define double @phi_with_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi nnan double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}
+
+define <2 x double> @vector_phi_with_nnan(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_nnan(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi nnan <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret <2 x double> [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca <2 x double>
+  store <2 x double> %a, ptr %x.addr
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  store <2 x double> %b, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load <2 x double>, ptr %x.addr
+  ret <2 x double> %retval
+}
+
+define double @phi_without_nnan(double %x) "no-nans-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}
+
+define <2 x double> @vector_phi_with_ninf(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-infs-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_ninf(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi ninf <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret <2 x double> [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca <2 x double>
+  store <2 x double> %a, ptr %x.addr
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  store <2 x double> %b, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load <2 x double>, ptr %x.addr
+  ret <2 x double> %retval
+}
+
+define double @phi_without_ninf(double %x) "no-infs-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_ninf(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}