[llvm] [InstCombine] Fold more 'fcmp' 'select' instrs idioms into 'fabs' (PR #83381)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 15 22:32:13 PDT 2024
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/83381
>From b6429e60da11cd30d47f03dc593c998706c59b24 Mon Sep 17 00:00:00 2001
From: Yashwant Singh <yashwants at nvidia.com>
Date: Fri, 23 Feb 2024 12:36:14 +0530
Subject: [PATCH 1/2] [InstCombine] Fold more 'fcmp' 'select' instrs idioms
into 'fabs'
Based on these C/C++ patterns when compiled with 'Ofast'
return X > 0.0 ? X : -X;
return X < 0.0 ? -X : X;
InstCombine tries to propogate FMF to 'select' instructions before attempting
a fold, but it can't safely propgate 'nsz' hence wasn't performing the optimization.
OOTH we should be able to do this optimization at 'Ofast' same as
gcc(https://godbolt.org/z/c69fe5fa6).
Bit of a workaround but this patch allows us to query the "no-signed-zeroes"
function attribute added to the function during 'Ofast' compilation.
Allowing instcombine to safely match the idiom.
---
.../InstCombine/InstCombineSelect.cpp | 6 ++++-
llvm/test/Transforms/InstCombine/fabs.ll | 27 +++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 71fa9b9ba41eb..bd56d92d5d3d0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2742,7 +2742,11 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
// Note: We require "nnan" for this fold because fcmp ignores the signbit
// of NAN, but IEEE-754 specifies the signbit of NAN values with
// fneg/fabs operations.
- if (!SI.hasNoSignedZeros() || !SI.hasNoNaNs())
+ if (!SI.hasNoNaNs())
+ return nullptr;
+
+ bool functionHasNoSignedZeroes = SI.getParent()->getParent()->hasFnAttribute("no-signed-zeros-fp-math");
+ if(!functionHasNoSignedZeroes && !SI.hasNoSignedZeros())
return nullptr;
if (Swap)
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index 7e380c2e4590a..88b02a852f3d7 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -547,6 +547,20 @@ define double @select_fcmp_nnan_nsz_ult_zero_unary_fneg(double %x) {
ret double %fabs
}
+
+define float @absfloat32f_olt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
+; CHECK-LABEL: @absfloat32f_olt_fast_no_signed_zeroes(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: ret float [[RETVAL_0]]
+;
+entry:
+ %cmp = fcmp fast olt float %x, 0.000000e+00
+ %fneg = fneg fast float %x
+ %retval.0 = select i1 %cmp, float %fneg, float %x
+ ret float %retval.0
+}
+
; X < -0.0 ? -X : X --> fabs(X)
define float @select_fcmp_nnan_nsz_olt_negzero(float %x) {
@@ -839,6 +853,19 @@ define <2 x float> @select_fcmp_nnan_nsz_ugt_zero_unary_fneg(<2 x float> %x) {
ret <2 x float> %fabs
}
+define float @absfloat32f_ogt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
+; CHECK-LABEL: @absfloat32f_ogt_fast_no_signed_zeroes(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: ret float [[RETVAL_0]]
+;
+entry:
+ %cmp = fcmp fast ogt float %x, 0.000000e+00
+ %fneg = fneg fast float %x
+ %retval.0 = select i1 %cmp, float %x, float %fneg
+ ret float %retval.0
+}
+
; X > -0.0 ? X : (0.0 - X) --> fabs(X)
define half @select_fcmp_nnan_nsz_ogt_negzero(half %x) {
>From 0708a3578501f49976869cbb5d6e64a836662ab3 Mon Sep 17 00:00:00 2001
From: Yashwant Singh <yashwants at nvidia.com>
Date: Fri, 1 Mar 2024 10:16:18 +0530
Subject: [PATCH 2/2] Added no atrribute tests, syntax and formatting
---
.../InstCombine/InstCombineSelect.cpp | 6 +-
.../Utils/PromoteMemoryToRegister.cpp | 8 +
.../Transforms/InstCombine/fabs-with-sroa.ll | 140 ++++++++++++++++++
llvm/test/Transforms/InstCombine/fabs.ll | 27 ----
4 files changed, 149 insertions(+), 32 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/fabs-with-sroa.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index bd56d92d5d3d0..71fa9b9ba41eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2742,11 +2742,7 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
// Note: We require "nnan" for this fold because fcmp ignores the signbit
// of NAN, but IEEE-754 specifies the signbit of NAN values with
// fneg/fabs operations.
- if (!SI.hasNoNaNs())
- return nullptr;
-
- bool functionHasNoSignedZeroes = SI.getParent()->getParent()->hasFnAttribute("no-signed-zeros-fp-math");
- if(!functionHasNoSignedZeroes && !SI.hasNoSignedZeros())
+ if (!SI.hasNoSignedZeros() || !SI.hasNoNaNs())
return nullptr;
if (Swap)
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 88b05aab8db4d..b77369f78d7d1 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
@@ -1112,6 +1113,13 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);
+ if (APN->isComplete() &&
+ APN->getFunction()->hasFnAttribute("no-signed-zeros-fp-math") &&
+ any_of(APN->incoming_values(),
+ [](Value *V) { return isa<FPMathOperator>(V); })) {
+ APN->setHasNoSignedZeros(true);
+ }
+
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
diff --git a/llvm/test/Transforms/InstCombine/fabs-with-sroa.ll b/llvm/test/Transforms/InstCombine/fabs-with-sroa.ll
new file mode 100644
index 0000000000000..2314fa7a8aa98
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fabs-with-sroa.ll
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=aarch64-unknown-linux-gnu < %s -passes='sroa' -S > %t.ll
+; RUN: cat %t.ll | FileCheck %s --check-prefix=SROA
+; RUN: opt -mtriple=aarch64-unknown-linux-gnu < %t.ll \
+; RUN: -passes='simplifycfg,instcombine' -S \
+; RUN: | FileCheck %s --check-prefix=FABS
+
+define double @fabs_fcmp_olt_nsz_func_attr(double %x) "no-signed-zeros-fp-math" {
+; SROA-LABEL: define double @fabs_fcmp_olt_nsz_func_attr(
+; SROA-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; SROA-NEXT: entry:
+; SROA-NEXT: [[CMP:%.*]] = fcmp nnan nsz olt double [[X]], 0.000000e+00
+; SROA-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; SROA: if.then:
+; SROA-NEXT: [[FNEG:%.*]] = fneg nnan nsz double [[X]]
+; SROA-NEXT: br label [[RETURN:%.*]]
+; SROA: if.else:
+; SROA-NEXT: br label [[RETURN]]
+; SROA: return:
+; SROA-NEXT: [[RETVAL_0:%.*]] = phi nsz double [ [[FNEG]], [[IF_THEN]] ], [ [[X]], [[IF_ELSE]] ]
+; SROA-NEXT: ret double [[RETVAL_0]]
+;
+; FABS-LABEL: define double @fabs_fcmp_olt_nsz_func_attr(
+; FABS-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; FABS-NEXT: entry:
+; FABS-NEXT: [[RETVAL_0:%.*]] = call nnan nsz double @llvm.fabs.f64(double [[X]])
+; FABS-NEXT: ret double [[RETVAL_0]]
+entry:
+ %retval = alloca double
+ %x.addr = alloca double
+ store double %x, ptr %x.addr
+ %0 = load double, ptr %x.addr
+ %cmp = fcmp nnan nsz olt double %0, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %1 = load double, ptr %x.addr
+ %fneg = fneg nnan nsz double %1
+ store double %fneg, ptr %retval
+ br label %return
+
+if.else: ; preds = %entry
+ %2 = load double, ptr %x.addr
+ store double %2, ptr %retval
+ br label %return
+
+return: ; preds = %if.else, %if.then
+ %3 = load double, ptr %retval
+ ret double %3
+}
+
+define double @fabs_fcmp_oge_nsz_func_attr(double %x) "no-signed-zeros-fp-math" {
+; SROA-LABEL: define double @fabs_fcmp_oge_nsz_func_attr(
+; SROA-SAME: double [[X:%.*]]) #[[ATTR0]] {
+; SROA-NEXT: entry:
+; SROA-NEXT: [[CMP:%.*]] = fcmp nnan nsz oge double [[X]], 0.000000e+00
+; SROA-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; SROA: if.then:
+; SROA-NEXT: br label [[RETURN:%.*]]
+; SROA: if.else:
+; SROA-NEXT: [[FNEG:%.*]] = fneg nnan nsz double [[X]]
+; SROA-NEXT: br label [[RETURN]]
+; SROA: return:
+; SROA-NEXT: [[RETVAL_0:%.*]] = phi nsz double [ [[X]], [[IF_THEN]] ], [ [[FNEG]], [[IF_ELSE]] ]
+; SROA-NEXT: ret double [[RETVAL_0]]
+;
+; FABS-LABEL: define double @fabs_fcmp_oge_nsz_func_attr(
+; FABS-SAME: double [[X:%.*]]) #[[ATTR0]] {
+; FABS-NEXT: entry:
+; FABS-NEXT: [[RETVAL_0:%.*]] = call nnan nsz double @llvm.fabs.f64(double [[X]])
+; FABS-NEXT: ret double [[RETVAL_0]]
+entry:
+ %retval = alloca double
+ %x.addr = alloca double
+ store double %x, ptr %x.addr
+ %0 = load double, ptr %x.addr
+ %cmp = fcmp nnan nsz oge double %0, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %1 = load double, ptr %x.addr
+ store double %1, ptr %retval
+ br label %return
+
+if.else: ; preds = %entry
+ %2 = load double, ptr %x.addr
+ %fneg = fneg nnan nsz double %2
+ store double %fneg, ptr %retval
+ br label %return
+
+return: ; preds = %if.else, %if.then
+ %3 = load double, ptr %retval
+ ret double %3
+}
+
+define double @fabs_fcmp_olt_missing_nsz_func_attr(double %x) {
+; SROA-LABEL: define double @fabs_fcmp_olt_missing_nsz_func_attr(
+; SROA-SAME: double [[X:%.*]]) {
+; SROA-NEXT: entry:
+; SROA-NEXT: [[CMP:%.*]] = fcmp nnan nsz olt double [[X]], 0.000000e+00
+; SROA-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; SROA: if.then:
+; SROA-NEXT: [[FNEG:%.*]] = fneg nnan nsz double [[X]]
+; SROA-NEXT: br label [[RETURN:%.*]]
+; SROA: if.else:
+; SROA-NEXT: br label [[RETURN]]
+; SROA: return:
+; SROA-NEXT: [[RETVAL_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ [[X]], [[IF_ELSE]] ]
+; SROA-NEXT: ret double [[RETVAL_0]]
+;
+; FABS-LABEL: define double @fabs_fcmp_olt_missing_nsz_func_attr(
+; FABS-SAME: double [[X:%.*]]) {
+; FABS-NEXT: entry:
+; FABS-NEXT: [[CMP:%.*]] = fcmp nnan nsz olt double [[X]], 0.000000e+00
+; FABS-NEXT: [[FNEG:%.*]] = fneg nnan nsz double [[X]]
+; FABS-NEXT: [[RETVAL_0:%.*]] = select nnan i1 [[CMP]], double [[FNEG]], double [[X]]
+; FABS-NEXT: ret double [[RETVAL_0]]
+entry:
+ %retval = alloca double
+ %x.addr = alloca double
+ store double %x, ptr %x.addr
+ %0 = load double, ptr %x.addr
+ %cmp = fcmp nnan nsz olt double %0, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %1 = load double, ptr %x.addr
+ %fneg = fneg nnan nsz double %1
+ store double %fneg, ptr %retval
+ br label %return
+
+if.else: ; preds = %entry
+ %2 = load double, ptr %x.addr
+ store double %2, ptr %retval
+ br label %return
+
+return: ; preds = %if.else, %if.then
+ %3 = load double, ptr %retval
+ ret double %3
+}
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index 88b02a852f3d7..7e380c2e4590a 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -547,20 +547,6 @@ define double @select_fcmp_nnan_nsz_ult_zero_unary_fneg(double %x) {
ret double %fabs
}
-
-define float @absfloat32f_olt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
-; CHECK-LABEL: @absfloat32f_olt_fast_no_signed_zeroes(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT: ret float [[RETVAL_0]]
-;
-entry:
- %cmp = fcmp fast olt float %x, 0.000000e+00
- %fneg = fneg fast float %x
- %retval.0 = select i1 %cmp, float %fneg, float %x
- ret float %retval.0
-}
-
; X < -0.0 ? -X : X --> fabs(X)
define float @select_fcmp_nnan_nsz_olt_negzero(float %x) {
@@ -853,19 +839,6 @@ define <2 x float> @select_fcmp_nnan_nsz_ugt_zero_unary_fneg(<2 x float> %x) {
ret <2 x float> %fabs
}
-define float @absfloat32f_ogt_fast_no_signed_zeroes(float %x) "no-signed-zeros-fp-math" {
-; CHECK-LABEL: @absfloat32f_ogt_fast_no_signed_zeroes(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = call nnan ninf float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT: ret float [[RETVAL_0]]
-;
-entry:
- %cmp = fcmp fast ogt float %x, 0.000000e+00
- %fneg = fneg fast float %x
- %retval.0 = select i1 %cmp, float %x, float %fneg
- ret float %retval.0
-}
-
; X > -0.0 ? X : (0.0 - X) --> fabs(X)
define half @select_fcmp_nnan_nsz_ogt_negzero(half %x) {
More information about the llvm-commits
mailing list