[llvm] [InstCombine] Fold more 'fcmp' 'select' instrs idioms into 'fabs' (PR #83381)

Tue Apr 30 02:26:42 PDT 2024

https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/83381

>From b6429e60da11cd30d47f03dc593c998706c59b24 Mon Sep 17 00:00:00 2001
From: Yashwant Singh <yashwants at nvidia.com>
Date: Fri, 23 Feb 2024 12:36:14 +0530
Subject: [PATCH 1/2] [InstCombine] Fold more 'fcmp' 'select' instrs idioms
 into 'fabs'

Based on these C/C++ patterns when compiled with 'Ofast'
return X > 0.0 ? X : -X;
return X < 0.0 ? -X : X;

InstCombine tries to propogate FMF to 'select' instructions before attempting
a fold, but it can't safely propgate 'nsz' hence wasn't performing the optimization.
OOTH  we should be able to do this optimization at 'Ofast' same as
gcc(https://godbolt.org/z/c69fe5fa6).

Bit of a workaround but this patch allows us to query the "no-signed-zeroes"
function attribute added to the function during 'Ofast' compilation.
Allowing instcombine to safely match the idiom.
---
 .../InstCombine/InstCombineSelect.cpp         |  6 ++++-
 llvm/test/Transforms/InstCombine/fabs.ll      | 27 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 71fa9b9ba41ebb..bd56d92d5d3d0f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2742,7 +2742,11 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
     // Note: We require "nnan" for this fold because fcmp ignores the signbit
     //       of NAN, but IEEE-754 specifies the signbit of NAN values with
     //       fneg/fabs operations.
-    if (!SI.hasNoSignedZeros() || !SI.hasNoNaNs())
+    if (!SI.hasNoNaNs())
+      return nullptr;
+
+    bool functionHasNoSignedZeroes = SI.getParent()->getParent()->hasFnAttribute("no-signed-zeros-fp-math");
+    if(!functionHasNoSignedZeroes && !SI.hasNoSignedZeros())
       return nullptr;
 
     if (Swap)
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index 7e380c2e4590a0..88b02a852f3d74 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -547,6 +547,20 @@ define double @select_fcmp_nnan_nsz_ult_zero_unary_fneg(double %x) {
   ret double %fabs
 }
 
+
+define float @absfloat32f_olt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
+; CHECK-LABEL: @absfloat32f_olt_fast_no_signed_zeroes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[RETVAL_0]]
+;
+entry:
+  %cmp = fcmp fast olt float %x, 0.000000e+00
+  %fneg = fneg fast float %x
+  %retval.0 = select i1 %cmp, float %fneg, float %x
+  ret float %retval.0
+}
+
 ; X < -0.0 ? -X : X --> fabs(X)
 
 define float @select_fcmp_nnan_nsz_olt_negzero(float %x) {
@@ -839,6 +853,19 @@ define <2 x float> @select_fcmp_nnan_nsz_ugt_zero_unary_fneg(<2 x float> %x) {
   ret <2 x float> %fabs
 }
 
+define float @absfloat32f_ogt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
+; CHECK-LABEL: @absfloat32f_ogt_fast_no_signed_zeroes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[RETVAL_0]]
+;
+entry:
+  %cmp = fcmp fast ogt float %x, 0.000000e+00
+  %fneg = fneg fast float %x
+  %retval.0 = select i1 %cmp, float %x, float %fneg
+  ret float %retval.0
+}
+
 ; X > -0.0 ? X : (0.0 - X) --> fabs(X)
 
 define half @select_fcmp_nnan_nsz_ogt_negzero(half %x) {

>From a803cf98bc1e812846258c1713df7ecde5bf5e51 Mon Sep 17 00:00:00 2001
From: Yashwant Singh <yashwants at nvidia.com>
Date: Fri, 1 Mar 2024 10:16:18 +0530
Subject: [PATCH 2/2] Added no atrribute tests, syntax and formatting

---
 .../InstCombine/InstCombineSelect.cpp         |  8 +--
 llvm/test/Transforms/InstCombine/fabs.ll      | 63 ++++++++++++++-----
 2 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index bd56d92d5d3d0f..67c3bcc1335f9f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2742,11 +2742,11 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
     // Note: We require "nnan" for this fold because fcmp ignores the signbit
     //       of NAN, but IEEE-754 specifies the signbit of NAN values with
     //       fneg/fabs operations.
-    if (!SI.hasNoNaNs())
-      return nullptr;
+    bool SIHasNoSignedZeros =
+        SI.hasNoSignedZeros() ||
+        (SI.getFunction()->hasFnAttribute("no-signed-zeros-fp-math"));
 
-    bool functionHasNoSignedZeroes = SI.getParent()->getParent()->hasFnAttribute("no-signed-zeros-fp-math");
-    if(!functionHasNoSignedZeroes && !SI.hasNoSignedZeros())
+    if (!SI.hasNoNaNs() || !SIHasNoSignedZeros)
       return nullptr;
 
     if (Swap)
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index 88b02a852f3d74..575a794bb9c6b5 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -548,17 +548,32 @@ define double @select_fcmp_nnan_nsz_ult_zero_unary_fneg(double %x) {
 }
 
 
-define float @absfloat32f_olt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
-; CHECK-LABEL: @absfloat32f_olt_fast_no_signed_zeroes(
+define float @fcmp_olt_select_nsz_func_attr(float %x) "no-signed-zeros-fp-math" {
+; CHECK-LABEL: @fcmp_olt_select_nsz_func_attr(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT:    ret float [[RETVAL_0]]
+; CHECK-NEXT:    [[FABS:%.*]] = call nnan float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[FABS]]
+;
+entry:
+  %fcmp = fcmp nnan nsz olt float %x, 0.000000e+00
+  %fneg = fneg float %x
+  %fabs = select nnan i1 %fcmp, float %fneg, float %x
+  ret float %fabs
+}
+
+define float @fcmp_olt_select_no_nsz(float %x) {
+; CHECK-LABEL: @fcmp_olt_select_no_nsz(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FCMP:%.*]] = fcmp nnan nsz olt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    [[NOFABS:%.*]] = select nnan i1 [[FCMP]], float [[FNEG]], float [[X]]
+; CHECK-NEXT:    ret float [[NOFABS]]
 ;
 entry:
-  %cmp = fcmp fast olt float %x, 0.000000e+00
-  %fneg = fneg fast float %x
-  %retval.0 = select i1 %cmp, float %fneg, float %x
-  ret float %retval.0
+  %fcmp = fcmp nnan nsz olt float %x, 0.000000e+00
+  %fneg = fneg float %x
+  %nofabs = select nnan i1 %fcmp, float %fneg, float %x
+  ret float %nofabs
 }
 
 ; X < -0.0 ? -X : X --> fabs(X)
@@ -853,19 +868,35 @@ define <2 x float> @select_fcmp_nnan_nsz_ugt_zero_unary_fneg(<2 x float> %x) {
   ret <2 x float> %fabs
 }
 
-define float @absfloat32f_ogt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
-; CHECK-LABEL: @absfloat32f_ogt_fast_no_signed_zeroes(
+define float @fcmp_ogt_select_nsz_func_attr(float %x) "no-signed-zeros-fp-math" {
+; CHECK-LABEL: @fcmp_ogt_select_nsz_func_attr(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT:    ret float [[RETVAL_0]]
+; CHECK-NEXT:    [[FABS:%.*]] = call nnan float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[FABS]]
 ;
 entry:
-  %cmp = fcmp fast ogt float %x, 0.000000e+00
-  %fneg = fneg fast float %x
-  %retval.0 = select i1 %cmp, float %x, float %fneg
-  ret float %retval.0
+  %fcmp = fcmp nnan nsz ogt float %x, 0.000000e+00
+  %fneg = fneg float %x
+  %fabs = select nnan i1 %fcmp, float %x, float %fneg
+  ret float %fabs
 }
 
+define float @fcmp_ogt_select_no_nsz(float %x) {
+; CHECK-LABEL: @fcmp_ogt_select_no_nsz(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FCMP:%.*]] = fcmp nnan nsz ogt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    [[NOFABS:%.*]] = select nnan i1 [[FCMP]], float [[X]], float [[FNEG]]
+; CHECK-NEXT:    ret float [[NOFABS]]
+;
+entry:
+  %fcmp = fcmp nnan nsz ogt float %x, 0.000000e+00
+  %fneg = fneg float %x
+  %nofabs = select nnan i1 %fcmp, float %x, float %fneg
+  ret float %nofabs
+}
+
+
 ; X > -0.0 ? X : (0.0 - X) --> fabs(X)
 
 define half @select_fcmp_nnan_nsz_ogt_negzero(half %x) {