[llvm] [SROA] Propagate no-signed-zeros(nsz) fast-math flag on the phi node using function attribute (PR #83381)
Sushant Gokhale via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 05:50:25 PDT 2024
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/83381
>From 8c7d119f11b0d73036832e9b0356856ded404016 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Thu, 30 May 2024 15:19:22 +0530
Subject: [PATCH] [SROA] Propagate no-signed-zeros(nsz) fast-math flag on the
phi node using function attribute
Its expected that the sequence return X > 0.0 ? X : -X, compiled with -Ofast, produces fabs intrinsic. However, at this point, LLVM is unable to do so.
The above sequence goes through the following transformation during the pass pipeline:
SROA pass generates the phi node. Here, it does not infer the fast-math flags on the phi node unlike clang frontend typically does.
Phi node eventually gets translated into select instruction.
Because of missing no-signed-zeros(nsz) fast-math flag on the select instruction, InstCombine pass fails to fold the sequence into fabs intrinsic.
This patch, as a part of SROA, tries to propagate nsz fast-math flag on the phi node using function attribute enabling this folding.
Co-authored-by: Sushant Gokhale <sgokhale at nvidia.com>
---
.../Utils/PromoteMemoryToRegister.cpp | 12 +++
llvm/test/Transforms/PhaseOrdering/fabs.ll | 24 ++++++
.../SROA/propagate-fast-math-flags-on-phi.ll | 79 +++++++++++++++++++
3 files changed, 115 insertions(+)
create mode 100644 llvm/test/Transforms/PhaseOrdering/fabs.ll
create mode 100644 llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 40d0f6b75d69b..bba9f5ff986c9 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
@@ -1113,6 +1114,17 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);
+ // For the sequence `return X > 0.0 ? X : -X`, it is expected that this
+ // results in fabs intrinsic. However, without no-signed-zeros(nsz) flag
+ // on the phi node generated at this stage, fabs folding does not
+ // happen. So, we try to infer nsz flag from the function attributes to
+ // enable this fabs folding.
+ if (APN->isComplete() && isa<FPMathOperator>(APN) &&
+ BB->getParent()
+ ->getFnAttribute("no-signed-zeros-fp-math")
+ .getValueAsBool())
+ APN->setHasNoSignedZeros(true);
+
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
diff --git a/llvm/test/Transforms/PhaseOrdering/fabs.ll b/llvm/test/Transforms/PhaseOrdering/fabs.ll
new file mode 100644
index 0000000000000..03fb24e2539d1
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/fabs.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s
+
+define double @fabs_fcmp_olt_nsz_func_attr(double %0, double %1) "no-signed-zeros-fp-math"="true" {
+; CHECK-LABEL: define double @fabs_fcmp_olt_nsz_func_attr(
+; CHECK-SAME: double [[TMP0:%.*]], double [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[X_0:%.*]] = tail call nnan nsz double @llvm.fabs.f64(double [[TMP0]])
+; CHECK-NEXT: ret double [[X_0]]
+entry:
+ %x = alloca double
+ store double %0, ptr %x
+ %cmp = fcmp nnan nsz olt double %0, 0.000000e+00
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ %fneg = fneg nnan nsz double %0
+ store double %fneg, ptr %x
+ br label %return
+
+return: ; preds = %entry, %if.then
+ %ret = load double, ptr %x
+ ret double %ret
+}
diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
new file mode 100644
index 0000000000000..2cc26363daf9c
--- /dev/null
+++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s
+define double @phi_with_nsz(double %x) "no-signed-zeros-fp-math"="true" {
+; CHECK-LABEL: define double @phi_with_nsz(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret double [[X_ADDR_0]]
+entry:
+ %x.addr = alloca double
+ %cmp = fcmp olt double %x, 0.0
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ %fneg = fneg double %x
+ store double %fneg, ptr %x.addr
+ br label %return
+
+return: ; preds = %entry,%if.then
+ %retval = load double, ptr %x.addr
+ ret double %retval
+}
+
+define <2 x double> @vector_phi_with_nsz(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-signed-zeros-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_nsz(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]]
+entry:
+ %x.addr = alloca <2 x double>
+ store <2 x double> %a, ptr %x.addr
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ store <2 x double> %b, ptr %x.addr
+ br label %return
+
+return: ; preds = %entry,%if.then
+ %retval = load <2 x double>, ptr %x.addr
+ ret <2 x double> %retval
+}
+
+define double @phi_without_nsz(double %x) "no-signed-zeros-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_nsz(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret double [[X_ADDR_0]]
+entry:
+ %x.addr = alloca double
+ %cmp = fcmp olt double %x, 0.0
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ %fneg = fneg double %x
+ store double %fneg, ptr %x.addr
+ br label %return
+
+return: ; preds = %entry,%if.then
+ %retval = load double, ptr %x.addr
+ ret double %retval
+}
More information about the llvm-commits
mailing list