[llvm] 42e11a6 - Add transform (and/or (icmp eq/ne (A, C)), (icmp eq/ne (A, -C))) -> (icmp eq/ne (ABS A), ABS(C))
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 16:59:26 PST 2023
- Previous message: [llvm] abf6692 - Tests for (and/or (icmp eq/ne A, C), (icmp eq/ne A, -C)) <--> (icmp eq/ne (ABS A), ABS(C)); NFC
- Next message: [PATCH] D142344: [DAGCombiner] Add Transform for `(and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))`->`(eq/ne (and (and A,Pow2),~(Pow2*2)), 0)`
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Author: Noah Goldstein
Date: 2023-02-14T18:59:04-06:00
New Revision: 42e11a6ea32174b322d26756450793f1d8405f08
URL: https://github.com/llvm/llvm-project/commit/42e11a6ea32174b322d26756450793f1d8405f08
DIFF: https://github.com/llvm/llvm-project/commit/42e11a6ea32174b322d26756450793f1d8405f08.diff
LOG: Add transform (and/or (icmp eq/ne (A, C)), (icmp eq/ne (A, -C))) -> (icmp eq/ne (ABS A), ABS(C))
This can be beneficial if there is a fast `ABS` (For example with X86
`vpabs`) or if there is a dominating ABS(A) in the `DAG`.
Note `C` is constant so `ABS(C)` is just a constant.
Alive2 Links:
EQ: https://alive2.llvm.org/ce/z/829F-c
NE: https://alive2.llvm.org/ce/z/tsS8bU
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D142601
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
llvm/test/CodeGen/X86/icmp-abs-C.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f6fb97df778a..e302ab931a40 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -282,6 +282,14 @@ class TargetLoweringBase {
Expensive = 2 // Negated expression is more expensive.
};
+ /// Enum of
diff erent potentially desirable ways to fold (and/or (setcc ...),
+ /// (setcc ...)).
+ enum class AndOrSETCCFoldKind {
+ None,
+ AddAnd,
+ ABS,
+ };
+
class ArgListEntry {
public:
Value *Val = nullptr;
@@ -4002,21 +4010,27 @@ class TargetLowering : public TargetLoweringBase {
return true;
}
- // Return true if its desirable to try and optimize LogicOp(SETCC0, SETCC1).
- // An example (what is implemented as of writing this) is:
+ // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
+ // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
+ // writing this) is:
// With C as a power of 2 and C != 0 and C != INT_MIN:
- // (icmp eq A, C) | (icmp eq A, -C)
+ // AddAnd:
+ // (icmp eq A, C) | (icmp eq A, -C)
// -> (icmp eq and(add(A, C), ~(C + C)), 0)
// (icmp ne A, C) & (icmp ne A, -C)w
// -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ // ABS:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq Abs(A), C)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne Abs(A), C)
//
// @param LogicOp the logic op
// @param SETCC0 the first of the SETCC nodes
// @param SETCC0 the second of the SETCC nodes
- virtual bool isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
- const SDNode *SETCC0,
- const SDNode *SETCC1) const {
- return false;
+ virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(
+ const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
+ return AndOrSETCCFoldKind::None;
}
/// Return true if it is profitable to combine an XOR of a logical shift
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 742686ea2f40..8862aefd774e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5867,6 +5867,7 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
}
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+ using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
assert(
(LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
"Invalid Op to combine SETCC with");
@@ -5878,8 +5879,10 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isDesirableToCombineLogicOpOfSETCC(LogicOp, LHS.getNode(),
- RHS.getNode()))
+ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
+ LogicOp, LHS.getNode(), RHS.getNode());
+
+ if (TargetPreference == AndOrSETCCFoldKind::None)
return SDValue();
ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
@@ -5890,35 +5893,55 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
SDValue LHS1 = LHS->getOperand(1);
SDValue RHS1 = RHS->getOperand(1);
- auto *LHS1C = dyn_cast<ConstantSDNode>(LHS1);
- auto *RHS1C = dyn_cast<ConstantSDNode>(RHS1);
+ // TODO: We don't actually need a splat here, for vectors we just need
+ // LaneLHS[N] == -LaneRHS[N];
+ auto *LHS1C = isConstOrConstSplat(LHS1);
+ auto *RHS1C = isConstOrConstSplat(RHS1);
+
EVT VT = LogicOp->getValueType(0);
EVT OpVT = LHS0.getValueType();
SDLoc DL(LogicOp);
- // With C as a power of 2 and C != 0 and C != INT_MIN:
- // (icmp eq A, C) | (icmp eq A, -C)
- // -> (icmp eq and(add(A, C), ~(C + C)), 0)
- // (icmp ne A, C) & (icmp ne A, -C)w
- // -> (icmp ne and(add(A, C), ~(C + C)), 0)
if (CCL == CCR &&
CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
RHS.hasOneUse() && LHS1C->getAPIntValue() == (-RHS1C->getAPIntValue())) {
- const ConstantSDNode *Pow2 = nullptr;
- if (LHS1C->getAPIntValue().isPowerOf2())
- Pow2 = LHS1C;
- else if (RHS1C->getAPIntValue().isPowerOf2())
- Pow2 = RHS1C;
- // isPowerOf2 is only for non-zero powers of 2.
- if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) {
- const APInt &C = Pow2->getAPIntValue();
- SDValue AddOp =
- DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT));
- SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
- DAG.getConstant(~(C + C), DL, OpVT));
- return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
- DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+
+ // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
+ // case this is just a compare).
+ if (TargetPreference == AndOrSETCCFoldKind::ABS ||
+ DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0})) {
+ APInt C = LHS1C->getAPIntValue();
+ if (C.isNegative())
+ C = RHS1C->getAPIntValue();
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq Abs(A), C)
+ // (icmp ne A, C) & (icmp ne A, -C)
+ // -> (icmp ne Abs(A), C)
+ SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
+ return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
+ DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference == AndOrSETCCFoldKind::AddAnd) {
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq and(add(A, C), ~(C + C)), 0)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ const ConstantSDNode *Pow2 = nullptr;
+ if (LHS1C->getAPIntValue().isPowerOf2())
+ Pow2 = LHS1C;
+ else if (RHS1C->getAPIntValue().isPowerOf2())
+ Pow2 = RHS1C;
+ // isPowerOf2 is only for non-zero powers of 2.
+ if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) {
+ const APInt &C = Pow2->getAPIntValue();
+ SDValue AddOp =
+ DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT));
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+ DAG.getConstant(~(C + C), DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ }
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index db6dd99fa0fd..25054b769482 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57147,10 +57147,18 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
}
-bool X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
+TargetLowering::AndOrSETCCFoldKind
+X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
+ using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
EVT VT = LogicOp->getValueType(0);
- return VT.isScalarInteger();
+ EVT OpVT = SETCC0->getOperand(0).getValueType();
+ if (!VT.isInteger())
+ return AndOrSETCCFoldKind::None;
+ if (VT.isVector())
+ return isOperationLegal(ISD::ABS, OpVT) ? AndOrSETCCFoldKind::ABS
+ : AndOrSETCCFoldKind::None;
+ return AndOrSETCCFoldKind::AddAnd;
}
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index cb46f48d14be..454ac829a99b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1058,8 +1058,9 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
- /// Return true if this is operating on scalar integers.
- bool
+ /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
+ /// integer, None otherwise.
+ TargetLowering::AndOrSETCCFoldKind
isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
const SDNode *SETCC0,
const SDNode *SETCC1) const override;
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 90e6e5becd49..f04aa2cd9ba3 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -510,9 +510,8 @@ define <4 x i32> @legal_abs_ne_unchangedd_sext(<4 x i32> %x) {
define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) {
; AVX512-LABEL: eq_or_to_abs_vec4x64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
-; AVX512-NEXT: korw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper
@@ -572,11 +571,9 @@ define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) {
define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
; AVX512-LABEL: eq_or_to_abs_vec4x64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
-; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
-; AVX512-NEXT: korw %k1, %k0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: eq_or_to_abs_vec4x64_sext:
@@ -644,8 +641,8 @@ define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; AVX512-LABEL: ne_and_to_abs_vec4x64:
; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
-; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper
@@ -713,10 +710,10 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
; AVX512-LABEL: ne_and_to_abs_vec4x64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
-; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1}
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: ne_and_to_abs_vec4x64_sext:
@@ -790,29 +787,22 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) {
; AVX512-LABEL: eq_or_to_abs_vec4x32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
-; AVX512-NEXT: korw %k1, %k0, %k1
-; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: eq_or_to_abs_vec4x32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: eq_or_to_abs_vec4x32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pabsd %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: eq_or_to_abs_vec4x32:
@@ -832,29 +822,22 @@ define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) {
define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) {
; AVX512-LABEL: eq_or_to_abs_vec4x32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
-; AVX512-NEXT: korw %k1, %k0, %k1
-; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: eq_or_to_abs_vec4x32_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: eq_or_to_abs_vec4x32_sext:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pabsd %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: eq_or_to_abs_vec4x32_sext:
@@ -875,31 +858,27 @@ define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) {
define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
; AVX512-LABEL: ne_and_to_abs_vec4x32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1}
-; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: ne_and_to_abs_vec4x32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm2, %xmm0
-; SSE41-NEXT: pandn %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pabsd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x32:
@@ -921,31 +900,27 @@ define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
; AVX512-LABEL: ne_and_to_abs_vec4x32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1}
-; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: ne_and_to_abs_vec4x32_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x32_sext:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm2, %xmm0
-; SSE41-NEXT: pandn %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pabsd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x32_sext:
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C.ll b/llvm/test/CodeGen/X86/icmp-abs-C.ll
index 842868d46da4..53b70fa38958 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C.ll
@@ -13,32 +13,29 @@ define i64 @eq_or_with_dom_abs(i64 %x) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: xorl %edi, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: xorl %edi, %eax
-; X86-NEXT: subl %edi, %eax
-; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: subl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: xorl $12312, %eax # imm = 0x3018
-; X86-NEXT: addl $64, %ecx
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: andl $-129, %ecx
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: sete %cl
+; X86-NEXT: xorl $64, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: sete %bl
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: movl $2344, %edi # imm = 0x928
; X86-NEXT: cmpl %eax, %edi
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: jb .LBB0_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %cl, %bl
+; X86-NEXT: movb %bl, %cl
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: .LBB0_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -50,13 +47,13 @@ define i64 @eq_or_with_dom_abs(i64 %x) nounwind {
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: negq %rcx
; X64-NEXT: cmovsq %rdi, %rcx
-; X64-NEXT: xorq $12312, %rcx # imm = 0x3018
-; X64-NEXT: addq $64, %rdi
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: xorq $12312, %rdx # imm = 0x3018
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testq $-129, %rdi
+; X64-NEXT: cmpq $64, %rcx
; X64-NEXT: sete %al
-; X64-NEXT: cmpq $2345, %rcx # imm = 0x929
-; X64-NEXT: cmovaeq %rcx, %rax
+; X64-NEXT: cmpq $2345, %rdx # imm = 0x929
+; X64-NEXT: cmovaeq %rdx, %rax
; X64-NEXT: retq
%absx = call i64 @llvm.abs.i64(i64 %x, i1 true)
%foo = xor i64 %absx, 12312
@@ -73,21 +70,20 @@ define i32 @eq_or_with_dom_abs_non_po2(i32 %x) nounwind {
; X86-LABEL: eq_or_with_dom_abs_non_po2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %edx, %eax
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: xorl $12312, %eax # imm = 0x3018
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl $123, %edx
-; X86-NEXT: sete %cl
-; X86-NEXT: cmpl $-123, %edx
; X86-NEXT: sete %dl
; X86-NEXT: cmpl $2345, %eax # imm = 0x929
; X86-NEXT: jae .LBB1_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: orb %dl, %cl
-; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: movb %dl, %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: .LBB1_2:
; X86-NEXT: retl
;
@@ -96,15 +92,13 @@ define i32 @eq_or_with_dom_abs_non_po2(i32 %x) nounwind {
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: negl %ecx
; X64-NEXT: cmovsl %edi, %ecx
-; X64-NEXT: xorl $12312, %ecx # imm = 0x3018
-; X64-NEXT: cmpl $123, %edi
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: xorl $12312, %edx # imm = 0x3018
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $123, %ecx
; X64-NEXT: sete %al
-; X64-NEXT: cmpl $-123, %edi
-; X64-NEXT: sete %dl
-; X64-NEXT: orb %al, %dl
-; X64-NEXT: cmpl $2345, %ecx # imm = 0x929
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: cmpl $2345, %edx # imm = 0x929
+; X64-NEXT: cmovael %edx, %eax
; X64-NEXT: retq
%absx = call i32 @llvm.abs.i32(i32 %x, i1 true)
%foo = xor i32 %absx, 12312
@@ -120,21 +114,18 @@ define i32 @eq_or_with_dom_abs_non_po2(i32 %x) nounwind {
define i8 @ne_and_with_dom_abs_non_pow2(i8 %x) nounwind {
; X86-LABEL: ne_and_with_dom_abs_non_pow2:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sarb $7, %cl
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: xorb %cl, %al
-; X86-NEXT: subb %cl, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarb $7, %al
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: xorb $12, %al
-; X86-NEXT: cmpb $121, %dl
+; X86-NEXT: cmpb $121, %cl
; X86-NEXT: setne %cl
-; X86-NEXT: cmpb $-121, %dl
-; X86-NEXT: setne %dl
; X86-NEXT: cmpb $24, %al
; X86-NEXT: jae .LBB2_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: andb %dl, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: .LBB2_2:
; X86-NEXT: retl
@@ -143,19 +134,16 @@ define i8 @ne_and_with_dom_abs_non_pow2(i8 %x) nounwind {
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: sarb $7, %al
+; X64-NEXT: xorb %al, %dil
+; X64-NEXT: subb %al, %dil
; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: xorb %al, %cl
-; X64-NEXT: subb %al, %cl
; X64-NEXT: xorb $12, %cl
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpb $121, %dil
; X64-NEXT: setne %al
-; X64-NEXT: cmpb $-121, %dil
-; X64-NEXT: setne %dl
-; X64-NEXT: andb %al, %dl
; X64-NEXT: cmpb $24, %cl
-; X64-NEXT: movzbl %dl, %edx
-; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: cmovbl %edx, %eax
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: cmovael %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%absx = call i8 @llvm.abs.i8(i8 %x, i1 true)
@@ -173,17 +161,16 @@ define i16 @ne_and_with_dom_abs(i16 %x) nounwind {
; X86-LABEL: ne_and_with_dom_abs:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movswl %cx, %edx
-; X86-NEXT: sarl $15, %edx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movswl %cx, %eax
+; X86-NEXT: sarl $15, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: subl %edx, %eax
; X86-NEXT: xorl $12312, %eax # imm = 0x3018
; X86-NEXT: movzwl %ax, %esi
-; X86-NEXT: addl $64, %ecx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: testl $65407, %ecx # imm = 0xFF7F
+; X86-NEXT: cmpw $64, %cx
; X86-NEXT: setne %cl
; X86-NEXT: cmpl $2345, %esi # imm = 0x929
; X86-NEXT: jae .LBB3_2
@@ -200,14 +187,14 @@ define i16 @ne_and_with_dom_abs(i16 %x) nounwind {
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: negw %cx
; X64-NEXT: cmovsw %di, %cx
-; X64-NEXT: xorl $12312, %ecx # imm = 0x3018
-; X64-NEXT: movzwl %cx, %edx
-; X64-NEXT: addl $64, %edi
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: xorl $12312, %edx # imm = 0x3018
+; X64-NEXT: movzwl %dx, %esi
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl $65407, %edi # imm = 0xFF7F
+; X64-NEXT: cmpw $64, %cx
; X64-NEXT: setne %al
-; X64-NEXT: cmpl $2345, %edx # imm = 0x929
-; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: cmpl $2345, %esi # imm = 0x929
+; X64-NEXT: cmovael %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%absx = call i16 @llvm.abs.i16(i16 %x, i1 true)
- Previous message: [llvm] abf6692 - Tests for (and/or (icmp eq/ne A, C), (icmp eq/ne A, -C)) <--> (icmp eq/ne (ABS A), ABS(C)); NFC
- Next message: [PATCH] D142344: [DAGCombiner] Add Transform for `(and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))`->`(eq/ne (and (and A,Pow2),~(Pow2*2)), 0)`
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list