[llvm] [Transforms][Utils][PromoteMem2Reg] Propagate nnan flag on par with the nsz flag (PR #114271)

Paul Osmialowski via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 30 10:16:38 PDT 2024


https://github.com/pawosm-arm created https://github.com/llvm/llvm-project/pull/114271

Following the change introduced by the PR #83381, this patch extends it with the same treatment of the nnan fast-math flag. This is to address the performance drop caused by PR#83200 which prevented vital InstCombine transformation due to the lack of relevant fast-math flags.

The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes are being created. Proposed change allows propagation of the nnan flag down to these Phi nodes.

>From 234b4c33279a3c24824e45535e6b78de93a2234c Mon Sep 17 00:00:00 2001
From: Pawel Osmialowski <pawel.osmialowski at arm.com>
Date: Wed, 30 Oct 2024 14:30:08 +0000
Subject: [PATCH] [Transforms][Utils][PromoteMem2Reg] Propagate nnan flag on
 par with the nsz flag

Following the change introduced by the PR #83381, this patch extends
it with the same treatment of the nnan fast-math flag. This is to
address the performance drop caused by PR#83200 which prevented vital
InstCombine transformation due to the lack of relevant fast-math
flags.

The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes
are being created. Proposed change allows propagation of the nnan
flag down to these Phi nodes.
---
 .../Utils/PromoteMemoryToRegister.cpp         |  9 +++
 .../SROA/propagate-fast-math-flags-on-phi.ll  | 78 +++++++++++++++++++
 2 files changed, 87 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 656bb1ebd1161e..8a42bdddb08119 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -394,6 +394,9 @@ struct PromoteMem2Reg {
   /// Whether the function has the no-signed-zeros-fp-math attribute set.
   bool NoSignedZeros = false;
 
+  /// Whether the function has the no-nans-fp-math attribute set.
+  bool NoNaNs = false;
+
 public:
   PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
                  AssumptionCache *AC)
@@ -752,6 +755,7 @@ void PromoteMem2Reg::run() {
   ForwardIDFCalculator IDF(DT);
 
   NoSignedZeros = F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool();
+  NoNaNs = F.getFnAttribute("no-nans-fp-math").getValueAsBool();
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
@@ -1140,6 +1144,11 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
         if (isa<FPMathOperator>(APN) && NoSignedZeros)
           APN->setHasNoSignedZeros(true);
 
+        // This allows select instruction folding relevant to floating point
+        // reductions whose operand is a PHI.
+        if (isa<FPMathOperator>(APN) && NoNaNs)
+          APN->setHasNoNaNs(true);
+
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
         AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
index 2cc26363daf9c5..4eda5108b7aba4 100644
--- a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
+++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
@@ -77,3 +77,81 @@ return:                          ; preds = %entry,%if.then
   %retval = load double, ptr %x.addr
   ret double %retval
 }
+
+define double @phi_with_nnan(double %x) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define double @phi_with_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi nnan double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}
+
+define <2 x double> @vector_phi_with_nnan(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_nnan(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi nnan <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret <2 x double> [[X_ADDR_0]]
+entry:
+  %x.addr = alloca <2 x double>
+  store <2 x double> %a, ptr %x.addr
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  store <2 x double> %b, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load <2 x double>, ptr %x.addr
+  ret <2 x double> %retval
+}
+
+define double @phi_without_nnan(double %x) "no-nans-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[X_ADDR_0]]
+entry:
+  %x.addr = alloca double
+  %cmp = fcmp olt double %x, 0.0
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                         ; preds = %entry
+  %fneg = fneg double %x
+  store double %fneg, ptr %x.addr
+  br label %return
+
+return:                          ; preds = %entry,%if.then
+  %retval = load double, ptr %x.addr
+  ret double %retval
+}



More information about the llvm-commits mailing list