[llvm] [X86] SimplifyDemandedBitsForTargetNode - add handling for X86ISD::FAND/FOR (PR #136618)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 21 14:36:34 PDT 2025


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/136618

Also add computeKnownBitsForTargetNode handling for X86ISD::FAND

Fixes #136368

>From bd728b833673f94a7c4b946418802becc3819ebd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 21 Apr 2025 22:35:30 +0100
Subject: [PATCH] [X86] SimplifyDemandedBitsForTargetNode - add handling for
 X86ISD::FAND/FOR

Also add computeKnownBitsForTargetNode handling for X86ISD::FAND

Fixes #136368
---
 llvm/lib/Target/X86/X86ISelLowering.cpp    | 57 +++++++++++++++++++++-
 llvm/test/CodeGen/X86/combine-fcopysign.ll | 30 +++++-------
 2 files changed, 68 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 993118c52564e..5e46708c7e877 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38487,11 +38487,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     Known.Zero |= Known2.One;
     break;
   }
+  case X86ISD::FAND: {
+    KnownBits Known2;
+    Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known &= Known2;
+    break;
+  }
   case X86ISD::FOR: {
     KnownBits Known2;
     Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
     Known |= Known2;
     break;
   }
@@ -44147,6 +44153,55 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     Known.Zero |= Known2.One;
     break;
   }
+  case X86ISD::FAND: {
+    KnownBits Known2;
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+                             Known, TLO, Depth + 1))
+      return true;
+
+    if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits,
+                             OriginalDemandedElts, Known2, TLO, Depth + 1))
+      return true;
+
+    // If all of the demanded bits are known one on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if (OriginalDemandedBits.isSubsetOf(Known2.Zero | Known.One))
+      return TLO.CombineTo(Op, Op0);
+    if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.One))
+      return TLO.CombineTo(Op, Op1);
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+      return TLO.CombineTo(Op, TLO.DAG.getConstantFP(0.0, SDLoc(Op), VT));
+
+    Known &= Known2;
+    break;
+  }
+  case X86ISD::FOR: {
+    KnownBits Known2;
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+                             Known, TLO, Depth + 1))
+      return true;
+
+    if (SimplifyDemandedBits(Op0, ~Known.One & OriginalDemandedBits,
+                             OriginalDemandedElts, Known2, TLO, Depth + 1))
+      return true;
+
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if (OriginalDemandedBits.isSubsetOf(Known2.One | Known.Zero))
+      return TLO.CombineTo(Op, Op0);
+    if (OriginalDemandedBits.isSubsetOf(Known.One | Known2.Zero))
+      return TLO.CombineTo(Op, Op1);
+
+    Known |= Known2;
+    break;
+  }
   case X86ISD::VSHLI: {
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll
index d7031be3addd9..0443d3aee3801 100644
--- a/llvm/test/CodeGen/X86/combine-fcopysign.ll
+++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll
@@ -252,28 +252,22 @@ define double @PR136368(double %x) {
 ; SSE-LABEL: PR136368:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movapd {{.*#+}} xmm1 = [NaN,NaN]
-; SSE-NEXT:    movapd %xmm0, %xmm2
-; SSE-NEXT:    andpd %xmm1, %xmm2
-; SSE-NEXT:    movsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; SSE-NEXT:    movapd %xmm3, %xmm4
-; SSE-NEXT:    cmpltsd %xmm2, %xmm4
-; SSE-NEXT:    andpd %xmm3, %xmm4
-; SSE-NEXT:    andpd %xmm1, %xmm4
-; SSE-NEXT:    andnpd %xmm0, %xmm1
-; SSE-NEXT:    orpd %xmm4, %xmm1
-; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm1
+; SSE-NEXT:    movsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
+; SSE-NEXT:    movapd %xmm2, %xmm3
+; SSE-NEXT:    cmpltsd %xmm1, %xmm3
+; SSE-NEXT:    andpd %xmm2, %xmm3
+; SSE-NEXT:    andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    orpd %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: PR136368:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [NaN,NaN]
-; AVX-NEXT:    # xmm1 = mem[0,0]
-; AVX-NEXT:    vandpd %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; AVX-NEXT:    vcmpltsd %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vandpd %xmm3, %xmm2, %xmm2
-; AVX-NEXT:    vandnpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vandpd %xmm1, %xmm2, %xmm1
+; AVX-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
+; AVX-NEXT:    vcmpltsd %xmm1, %xmm2, %xmm1
+; AVX-NEXT:    vandpd %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vorpd %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %fabs = tail call double @llvm.fabs.f64(double %x)



More information about the llvm-commits mailing list