[llvm] e7f7b63 - [DAGCombiner][X86] Guard `(X & Y) ==/!= Y` --> `(X & Y) !=/== 0` behind TLI preference
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 15 23:59:34 PDT 2023
Author: Noah Goldstein
Date: 2023-08-16T02:00:15-05:00
New Revision: e7f7b63fb3dd7a7d5c972b1f024a41df57c483a2
URL: https://github.com/llvm/llvm-project/commit/e7f7b63fb3dd7a7d5c972b1f024a41df57c483a2
DIFF: https://github.com/llvm/llvm-project/commit/e7f7b63fb3dd7a7d5c972b1f024a41df57c483a2.diff
LOG: [DAGCombiner][X86] Guard `(X & Y) ==/!= Y` --> `(X & Y) !=/== 0` behind TLI preference
On X86 for vec types `(X & Y) == Y` is generally preferable to
`(X & Y) != 0`. Creating zero requires an extra instruction and on
pre-avx512 targets there is no vector `pcmpne` so it requires two
additional instructions to invert the `pcmpeq`.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D157014
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/known-pow2.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index ce12abe952c9a5..449ca2a1960486 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5308,6 +5308,11 @@ class TargetLowering : public TargetLoweringBase {
// combiner can fold the new nodes.
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
+ // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y`
+ virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const {
+ return true;
+ }
+
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2b44b57ab2a32c..033d1a5c3954de 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3852,8 +3852,12 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+ // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
+ // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit
diff icult as
+ // its liable to create and infinite loop.
SDValue Zero = DAG.getConstant(0, DL, OpVT);
- if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
+ DAG.isKnownToBeAPowerOfTwo(Y)) {
// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 559500df90242e..85ccf62fe58f0b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22423,6 +22423,11 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return Sub.getValue(1);
}
+bool X86TargetLowering::isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
+ EVT VT) const {
+ return !VT.isVector() || Cond != ISD::CondCode::SETEQ;
+}
+
/// Check if replacement of SQRT with RSQRT should be disabled.
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b3b00dbf3c07da..bba457239547cf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1057,6 +1057,9 @@ namespace llvm {
bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
EVT ExtVT) const override;
+ bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
+ EVT VT) const override;
+
/// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index c7465233711d88..e183bbc15617d5 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -619,11 +619,8 @@ define <4 x i1> @pow2_vselect_eq(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y, <4 x i
; CHECK-NEXT: pand %xmm0, %xmm2
; CHECK-NEXT: pandn %xmm7, %xmm0
; CHECK-NEXT: por %xmm2, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pand %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pand %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%zz = lshr <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>, %z
More information about the llvm-commits
mailing list