[llvm] [InstCombine] Generalize fold of `fcmp + copysign` (PR #86387)

Sat Mar 23 02:51:27 PDT 2024

https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/86387

This patch generalize the fold of `fcmp + copysign`:
```
fcmp pred (copysign C1, X), C2 --> select !signbit(X), (fcmp pred abs(C1), C2), (fcmp pred nabs(C1), C2)
```

It will improve the codegen of idiom `fcmp oeq (copysign C, X), +/-C`.
Godbolt: https://godbolt.org/z/a8zPheTqT


>From 0a445e83138f44a36708ffdac0e3d26cc9b1d393 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 23 Mar 2024 17:33:25 +0800
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests. NFC.

---
 llvm/test/Transforms/InstCombine/fcmp.ll | 52 ++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index f2701d16d0f3d1..73e9a85df2f2f8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -736,6 +736,58 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
   ret i1 %r
 }
 
+define i1 @test_oeq(float %a) {
+; CHECK-LABEL: @test_oeq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[RES]], 1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp oeq float %res, 1.0
+  ret i1 %cmp
+}
+
+define i1 @test_one(float %a) {
+; CHECK-LABEL: @test_one(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one float [[RES]], 1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp one float %res, 1.0
+  ret i1 %cmp
+}
+
+define i1 @test_ogt_false(float %a) {
+; CHECK-LABEL: @test_ogt_false(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[RES]], 2.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp ogt float %res, 2.0
+  ret i1 %cmp
+}
+
+define i1 @test_olt_true(float %a) {
+; CHECK-LABEL: @test_olt_true(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[RES]], 2.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp olt float %res, 2.0
+  ret i1 %cmp
+}
+
 define <2 x i1> @lossy_oeq(<2 x float> %x) {
 ; CHECK-LABEL: @lossy_oeq(
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer

>From 25e1ce4dfb9ad27a44875c7ec4aea68cdd4d94ae Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 23 Mar 2024 17:36:40 +0800
Subject: [PATCH 2/2] [InstCombine] Generalize fold of `fcmp + copysign`

---
 .../InstCombine/InstCombineCompares.cpp       | 20 +++----
 llvm/test/Transforms/InstCombine/fcmp.ll      | 56 ++++++++-----------
 2 files changed, 32 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..076452200fead5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -8101,22 +8101,22 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
   }
 
   // Convert a sign-bit test of an FP value into a cast and integer compare.
-  // TODO: Simplify if the copysign constant is 0.0 or NaN.
-  // TODO: Handle non-zero compare constants.
-  // TODO: Handle other predicates.
   if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
                                                            m_Value(X)))) &&
-      match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
+      match(Op1, m_ImmConstant(RHSC))) {
     Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
     if (auto *VecTy = dyn_cast<VectorType>(OpType))
       IntType = VectorType::get(IntType, VecTy->getElementCount());
 
-    // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
-    if (Pred == FCmpInst::FCMP_OLT) {
-      Value *IntX = Builder.CreateBitCast(X, IntType);
-      return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
-                          ConstantInt::getNullValue(IntType));
-    }
+    APFloat PosC = abs(*C);
+    if (Value *CmpPos = ConstantFoldCompareInstOperands(
+            Pred, ConstantFP::get(X->getType(), PosC), RHSC, DL, &TLI, &I))
+      if (Value *CmpNeg = ConstantFoldCompareInstOperands(
+              Pred, ConstantFP::get(X->getType(), -PosC), RHSC, DL, &TLI, &I)) {
+        Value *IntX = Builder.CreateBitCast(X, IntType);
+        Value *NotNeg = Builder.CreateIsNotNeg(IntX);
+        return SelectInst::Create(NotNeg, CmpPos, CmpNeg);
+      }
   }
 
   {
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index 73e9a85df2f2f8..cff533ce5e9cc8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -596,8 +596,8 @@ define i1 @is_signbit_set(double %x) {
 
 define i1 @is_signbit_set_1(double %x) {
 ; CHECK-LABEL: @is_signbit_set_1(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ult double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -607,8 +607,8 @@ define i1 @is_signbit_set_1(double %x) {
 
 define i1 @is_signbit_set_2(double %x) {
 ; CHECK-LABEL: @is_signbit_set_2(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ole double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -618,8 +618,8 @@ define i1 @is_signbit_set_2(double %x) {
 
 define i1 @is_signbit_set_3(double %x) {
 ; CHECK-LABEL: @is_signbit_set_3(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ule double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -640,12 +640,10 @@ define <2 x i1> @is_signbit_set_anyzero(<2 x double> %x) {
   ret <2 x i1> %r
 }
 
-; TODO: Handle different predicates.
-
 define i1 @is_signbit_clear(double %x) {
 ; CHECK-LABEL: @is_signbit_clear(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -655,8 +653,8 @@ define i1 @is_signbit_clear(double %x) {
 
 define i1 @is_signbit_clear_1(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_1(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ugt double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -666,8 +664,8 @@ define i1 @is_signbit_clear_1(double %x) {
 
 define i1 @is_signbit_clear_2(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_2(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp oge double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -677,8 +675,8 @@ define i1 @is_signbit_clear_2(double %x) {
 
 define i1 @is_signbit_clear_3(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_3(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp uge double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -701,12 +699,10 @@ define i1 @is_signbit_set_extra_use(double %x, ptr %p) {
   ret i1 %r
 }
 
-; TODO: Handle non-zero compare constant.
-
 define i1 @is_signbit_clear_nonzero(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_nonzero(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[S]], 1.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -714,8 +710,6 @@ define i1 @is_signbit_clear_nonzero(double %x) {
   ret i1 %r
 }
 
-; TODO: Handle zero copysign constant.
-
 define i1 @is_signbit_set_simplify_zero(double %x) {
 ; CHECK-LABEL: @is_signbit_set_simplify_zero(
 ; CHECK-NEXT:    ret i1 false
@@ -725,8 +719,6 @@ define i1 @is_signbit_set_simplify_zero(double %x) {
   ret i1 %r
 }
 
-; TODO: Handle NaN copysign constant.
-
 define i1 @is_signbit_set_simplify_nan(double %x) {
 ; CHECK-LABEL: @is_signbit_set_simplify_nan(
 ; CHECK-NEXT:    ret i1 false
@@ -739,8 +731,8 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
 define i1 @test_oeq(float %a) {
 ; CHECK-LABEL: @test_oeq(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[RES]], 1.000000e+00
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
 entry:
@@ -752,8 +744,8 @@ entry:
 define i1 @test_one(float %a) {
 ; CHECK-LABEL: @test_one(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp one float [[RES]], 1.000000e+00
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
 entry:
@@ -765,9 +757,7 @@ entry:
 define i1 @test_ogt_false(float %a) {
 ; CHECK-LABEL: @test_ogt_false(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[RES]], 2.000000e+00
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 false
 ;
 entry:
   %res = call float @llvm.copysign.f32(float 1.0, float %a)
@@ -778,9 +768,7 @@ entry:
 define i1 @test_olt_true(float %a) {
 ; CHECK-LABEL: @test_olt_true(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.copysign.f32(float 1.000000e+00, float [[A:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[RES]], 2.000000e+00
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %res = call float @llvm.copysign.f32(float 1.0, float %a)