[llvm] [X86] SimplifyDemandedBitsForTargetNode - add handling for X86ISD::FAND/FOR (PR #136618)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 21 14:36:34 PDT 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/136618
Also add computeKnownBitsForTargetNode handling for X86ISD::FAND
Fixes #136368
>From bd728b833673f94a7c4b946418802becc3819ebd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 21 Apr 2025 22:35:30 +0100
Subject: [PATCH] [X86] SimplifyDemandedBitsForTargetNode - add handling for
X86ISD::FAND/FOR
Also add computeKnownBitsForTargetNode handling for X86ISD::FAND
Fixes #136368
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 57 +++++++++++++++++++++-
llvm/test/CodeGen/X86/combine-fcopysign.ll | 30 +++++-------
2 files changed, 68 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 993118c52564e..5e46708c7e877 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38487,11 +38487,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero |= Known2.One;
break;
}
+ case X86ISD::FAND: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known &= Known2;
+ break;
+ }
case X86ISD::FOR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
Known |= Known2;
break;
}
@@ -44147,6 +44153,55 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Known.Zero |= Known2.One;
break;
}
+ case X86ISD::FAND: {
+ KnownBits Known2;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits,
+ OriginalDemandedElts, Known2, TLO, Depth + 1))
+ return true;
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if (OriginalDemandedBits.isSubsetOf(Known2.Zero | Known.One))
+ return TLO.CombineTo(Op, Op0);
+ if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.One))
+ return TLO.CombineTo(Op, Op1);
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getConstantFP(0.0, SDLoc(Op), VT));
+
+ Known &= Known2;
+ break;
+ }
+ case X86ISD::FOR: {
+ KnownBits Known2;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op0, ~Known.One & OriginalDemandedBits,
+ OriginalDemandedElts, Known2, TLO, Depth + 1))
+ return true;
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if (OriginalDemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (OriginalDemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+
+ Known |= Known2;
+ break;
+ }
case X86ISD::VSHLI: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll
index d7031be3addd9..0443d3aee3801 100644
--- a/llvm/test/CodeGen/X86/combine-fcopysign.ll
+++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll
@@ -252,28 +252,22 @@ define double @PR136368(double %x) {
; SSE-LABEL: PR136368:
; SSE: # %bb.0:
; SSE-NEXT: movapd {{.*#+}} xmm1 = [NaN,NaN]
-; SSE-NEXT: movapd %xmm0, %xmm2
-; SSE-NEXT: andpd %xmm1, %xmm2
-; SSE-NEXT: movsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; SSE-NEXT: movapd %xmm3, %xmm4
-; SSE-NEXT: cmpltsd %xmm2, %xmm4
-; SSE-NEXT: andpd %xmm3, %xmm4
-; SSE-NEXT: andpd %xmm1, %xmm4
-; SSE-NEXT: andnpd %xmm0, %xmm1
-; SSE-NEXT: orpd %xmm4, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm1
+; SSE-NEXT: movsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: cmpltsd %xmm1, %xmm3
+; SSE-NEXT: andpd %xmm2, %xmm3
+; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: orpd %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: PR136368:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
-; AVX-NEXT: # xmm1 = mem[0,0]
-; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; AVX-NEXT: vcmpltsd %xmm2, %xmm3, %xmm2
-; AVX-NEXT: vandpd %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX-NEXT: vmovsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
+; AVX-NEXT: vcmpltsd %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vorpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%fabs = tail call double @llvm.fabs.f64(double %x)
More information about the llvm-commits
mailing list