[llvm] 3521ecf - [X86] Add vector support to targetShrinkDemandedConstant for OR/XOR opcodes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 29 04:29:58 PDT 2020
Author: Simon Pilgrim
Date: 2020-06-29T12:19:05+01:00
New Revision: 3521ecf1f8a3cf5e4811f93a9a809fc722462bbf
URL: https://github.com/llvm/llvm-project/commit/3521ecf1f8a3cf5e4811f93a9a809fc722462bbf
DIFF: https://github.com/llvm/llvm-project/commit/3521ecf1f8a3cf5e4811f93a9a809fc722462bbf.diff
LOG: [X86] Add vector support to targetShrinkDemandedConstant for OR/XOR opcodes
If a constant is only allsignbits in the demanded/active bits, then sign extend it to an allsignbits bool pattern for OR/XOR ops.
This also requires SimplifyDemandedBits XOR handling to be modified to call ShrinkDemandedConstant on any (non-NOT) XOR pattern to account for non-splat cases.
Next step towards fixing PR45808 - with this patch we now get a <-1,-1,0,0> v4i64 constant instead of <1,1,0,0>.
Differential Revision: https://reviews.llvm.org/D82257
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/promote-cmp.ll
llvm/test/CodeGen/X86/setcc-lowering.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f6e34e87fed0..d81582d4dd04 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1330,7 +1330,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
- if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+ ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
+ if (C) {
// If one side is a constant, and all of the known set bits on the other
// side are also set in the constant, turn this into an AND, as we know
// the bits will be cleared.
@@ -1345,18 +1346,19 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue()) {
- if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
- // We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
- return TLO.CombineTo(Op, New);
- }
- // If we can't turn this into a 'not', try to shrink the constant.
- if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
- return true;
+ if (!C->isAllOnesValue() &&
+ DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
}
}
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (!C || !C->isAllOnesValue())
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+
Known ^= Known2;
break;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2e570e0e5a37..aa0988b28498 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33221,18 +33221,49 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
- // Only optimize Ands to prevent shrinking a constant that could be
- // matched by movzx.
- if (Op.getOpcode() != ISD::AND)
- return false;
-
EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
+ unsigned EltSize = VT.getScalarSizeInBits();
- // Ignore vectors.
- if (VT.isVector())
+ if (VT.isVector()) {
+ // If the constant is only all signbits in the active bits, then we should
+ // extend it to the entire constant to allow it act as a boolean constant
+ // vector.
+ auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) {
+ if (!ISD::isBuildVectorOfConstantSDNodes(V.getNode()))
+ return false;
+ for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i] || V.getOperand(i).isUndef())
+ continue;
+ const APInt &Val = V.getConstantOperandAPInt(i);
+ if (Val.getBitWidth() > Val.getNumSignBits() &&
+ Val.trunc(ActiveBits).getNumSignBits() == ActiveBits)
+ return true;
+ }
+ return false;
+ };
+ // For vectors - if we have a constant, then try to sign extend.
+ // TODO: Handle AND/ANDN cases.
+ unsigned ActiveBits = DemandedBits.getActiveBits();
+ if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) &&
+ (Opcode == ISD::OR || Opcode == ISD::XOR) &&
+ NeedsSignExtension(Op.getOperand(1), ActiveBits)) {
+ EVT BoolVT = EVT::getVectorVT(*TLO.DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ SDValue NewC =
+ TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT,
+ Op.getOperand(1), TLO.DAG.getValueType(BoolVT));
+ SDValue NewOp =
+ TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ }
return false;
+ }
- unsigned Size = VT.getSizeInBits();
+ // Only optimize Ands to prevent shrinking a constant that could be
+ // matched by movzx.
+ if (Opcode != ISD::AND)
+ return false;
// Make sure the RHS really is a constant.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -33254,10 +33285,10 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
// Find the next power of 2 width, rounding up to a byte.
Width = PowerOf2Ceil(std::max(Width, 8U));
// Truncate the width to size to handle illegal types.
- Width = std::min(Width, Size);
+ Width = std::min(Width, EltSize);
// Calculate a possible zero extend mask for this constant.
- APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width);
+ APInt ZeroExtendMask = APInt::getLowBitsSet(EltSize, Width);
// If we aren't changing the mask, just return true to keep it and prevent
// the caller from optimizing.
diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index ce3c7d1cb8ac..f23900e8b8f8 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -30,19 +30,20 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
-; SSE2-NEXT: movaps {{.*#+}} xmm4 = <1,1,u,u>
-; SSE2-NEXT: xorps %xmm5, %xmm4
-; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,1,3,3]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movaps %xmm5, %xmm6
+; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[2,1],xmm5[3,3]
+; SSE2-NEXT: psllq $63, %xmm6
+; SSE2-NEXT: psrad $31, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,1,1,3]
+; SSE2-NEXT: xorps %xmm4, %xmm5
; SSE2-NEXT: psllq $63, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT: pand %xmm5, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm5
-; SSE2-NEXT: por %xmm5, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,1,1,3]
-; SSE2-NEXT: psllq $63, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm3
; SSE2-NEXT: por %xmm3, %xmm0
@@ -56,10 +57,11 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; SSE4-NEXT: movdqa %xmm4, %xmm5
; SSE4-NEXT: pcmpgtq %xmm2, %xmm5
; SSE4-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
-; SSE4-NEXT: pxor {{.*}}(%rip), %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm5, %xmm6
; SSE4-NEXT: psllq $63, %xmm0
; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm3
-; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero
+; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero
; SSE4-NEXT: psllq $63, %xmm0
; SSE4-NEXT: blendvpd %xmm0, %xmm4, %xmm2
; SSE4-NEXT: movapd %xmm2, %xmm0
@@ -72,9 +74,8 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vxorpd {{.*}}(%rip), %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -82,7 +83,6 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpsllq $63, %ymm2, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%3 = icmp sgt <4 x i64> %0, %1
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index aade54b0b02e..34f3ad697ad5 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -16,8 +16,6 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
-; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
More information about the llvm-commits
mailing list