[llvm] [InstCombine] Generalize fold of `fcmp + copysign` (PR #86387)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 23 02:51:27 PDT 2024
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/86387
This patch generalize the fold of `fcmp + copysign`:
```
fcmp pred (copysign C1, X), C2 --> select !signbit(X), (fcmp pred abs(C1), C2), (fcmp pred nabs(C1), C2)
```
It will improve the codegen of idiom `fcmp oeq (copysign C, X), +/-C`.
Godbolt: https://godbolt.org/z/a8zPheTqT
>From 0a445e83138f44a36708ffdac0e3d26cc9b1d393 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 23 Mar 2024 17:33:25 +0800
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests. NFC.
---
llvm/test/Transforms/InstCombine/fcmp.ll | 52 ++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index f2701d16d0f3d1..73e9a85df2f2f8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -736,6 +736,58 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
ret i1 %r
}
+define i1 @test_oeq(float %a) {
+; CHECK-LABEL: @test_oeq(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[RES]], 1.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp oeq float %res, 1.0
+ ret i1 %cmp
+}
+
+define i1 @test_one(float %a) {
+; CHECK-LABEL: @test_one(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[RES]], 1.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp one float %res, 1.0
+ ret i1 %cmp
+}
+
+define i1 @test_ogt_false(float %a) {
+; CHECK-LABEL: @test_ogt_false(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[RES]], 2.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp ogt float %res, 2.0
+ ret i1 %cmp
+}
+
+define i1 @test_olt_true(float %a) {
+; CHECK-LABEL: @test_olt_true(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[RES]], 2.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp olt float %res, 2.0
+ ret i1 %cmp
+}
+
define <2 x i1> @lossy_oeq(<2 x float> %x) {
; CHECK-LABEL: @lossy_oeq(
; CHECK-NEXT: ret <2 x i1> zeroinitializer
>From 25e1ce4dfb9ad27a44875c7ec4aea68cdd4d94ae Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 23 Mar 2024 17:36:40 +0800
Subject: [PATCH 2/2] [InstCombine] Generalize fold of `fcmp + copysign`
---
.../InstCombine/InstCombineCompares.cpp | 20 +++----
llvm/test/Transforms/InstCombine/fcmp.ll | 56 ++++++++-----------
2 files changed, 32 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..076452200fead5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -8101,22 +8101,22 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
}
// Convert a sign-bit test of an FP value into a cast and integer compare.
- // TODO: Simplify if the copysign constant is 0.0 or NaN.
- // TODO: Handle non-zero compare constants.
- // TODO: Handle other predicates.
if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
m_Value(X)))) &&
- match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
+ match(Op1, m_ImmConstant(RHSC))) {
Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
if (auto *VecTy = dyn_cast<VectorType>(OpType))
IntType = VectorType::get(IntType, VecTy->getElementCount());
- // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
- if (Pred == FCmpInst::FCMP_OLT) {
- Value *IntX = Builder.CreateBitCast(X, IntType);
- return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
- ConstantInt::getNullValue(IntType));
- }
+ APFloat PosC = abs(*C);
+ if (Value *CmpPos = ConstantFoldCompareInstOperands(
+ Pred, ConstantFP::get(X->getType(), PosC), RHSC, DL, &TLI, &I))
+ if (Value *CmpNeg = ConstantFoldCompareInstOperands(
+ Pred, ConstantFP::get(X->getType(), -PosC), RHSC, DL, &TLI, &I)) {
+ Value *IntX = Builder.CreateBitCast(X, IntType);
+ Value *NotNeg = Builder.CreateIsNotNeg(IntX);
+ return SelectInst::Create(NotNeg, CmpPos, CmpNeg);
+ }
}
{
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index 73e9a85df2f2f8..cff533ce5e9cc8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -596,8 +596,8 @@ define i1 @is_signbit_set(double %x) {
define i1 @is_signbit_set_1(double %x) {
; CHECK-LABEL: @is_signbit_set_1(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ult double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -607,8 +607,8 @@ define i1 @is_signbit_set_1(double %x) {
define i1 @is_signbit_set_2(double %x) {
; CHECK-LABEL: @is_signbit_set_2(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ole double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -618,8 +618,8 @@ define i1 @is_signbit_set_2(double %x) {
define i1 @is_signbit_set_3(double %x) {
; CHECK-LABEL: @is_signbit_set_3(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ule double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -640,12 +640,10 @@ define <2 x i1> @is_signbit_set_anyzero(<2 x double> %x) {
ret <2 x i1> %r
}
-; TODO: Handle different predicates.
-
define i1 @is_signbit_clear(double %x) {
; CHECK-LABEL: @is_signbit_clear(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ogt double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -655,8 +653,8 @@ define i1 @is_signbit_clear(double %x) {
define i1 @is_signbit_clear_1(double %x) {
; CHECK-LABEL: @is_signbit_clear_1(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ugt double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -666,8 +664,8 @@ define i1 @is_signbit_clear_1(double %x) {
define i1 @is_signbit_clear_2(double %x) {
; CHECK-LABEL: @is_signbit_clear_2(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp oge double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -677,8 +675,8 @@ define i1 @is_signbit_clear_2(double %x) {
define i1 @is_signbit_clear_3(double %x) {
; CHECK-LABEL: @is_signbit_clear_3(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp uge double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -701,12 +699,10 @@ define i1 @is_signbit_set_extra_use(double %x, ptr %p) {
ret i1 %r
}
-; TODO: Handle non-zero compare constant.
-
define i1 @is_signbit_clear_nonzero(double %x) {
; CHECK-LABEL: @is_signbit_clear_nonzero(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ogt double [[S]], 1.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -714,8 +710,6 @@ define i1 @is_signbit_clear_nonzero(double %x) {
ret i1 %r
}
-; TODO: Handle zero copysign constant.
-
define i1 @is_signbit_set_simplify_zero(double %x) {
; CHECK-LABEL: @is_signbit_set_simplify_zero(
; CHECK-NEXT: ret i1 false
@@ -725,8 +719,6 @@ define i1 @is_signbit_set_simplify_zero(double %x) {
ret i1 %r
}
-; TODO: Handle NaN copysign constant.
-
define i1 @is_signbit_set_simplify_nan(double %x) {
; CHECK-LABEL: @is_signbit_set_simplify_nan(
; CHECK-NEXT: ret i1 false
@@ -739,8 +731,8 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
define i1 @test_oeq(float %a) {
; CHECK-LABEL: @test_oeq(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[RES]], 1.000000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], -1
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
@@ -752,8 +744,8 @@ entry:
define i1 @test_one(float %a) {
; CHECK-LABEL: @test_one(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[RES]], 1.000000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
@@ -765,9 +757,7 @@ entry:
define i1 @test_ogt_false(float %a) {
; CHECK-LABEL: @test_ogt_false(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[RES]], 2.000000e+00
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 false
;
entry:
%res = call float @llvm.copysign.f32(float 1.0, float %a)
@@ -778,9 +768,7 @@ entry:
define i1 @test_olt_true(float %a) {
; CHECK-LABEL: @test_olt_true(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[RES]], 2.000000e+00
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 true
;
entry:
%res = call float @llvm.copysign.f32(float 1.0, float %a)
More information about the llvm-commits
mailing list