[llvm] r285381 - Revert "[DAGCombiner] Add vector demanded elements support to computeKnownBits"
Juergen Ributzka via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 27 21:01:13 PDT 2016
Author: ributzka
Date: Thu Oct 27 23:01:12 2016
New Revision: 285381
URL: http://llvm.org/viewvc/llvm-project?rev=285381&view=rev
Log:
Revert "[DAGCombiner] Add vector demanded elements support to computeKnownBits"
This seems to have increased LTO compile time bejond 2x of previous builds.
See http://lab.llvm.org:8080/green/job/clang-stage2-configure-Rlto/10676/
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll
llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=285381&r1=285380&r2=285381&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Thu Oct 27 23:01:12 2016
@@ -1256,22 +1256,12 @@ public:
const;
/// Determine which bits of Op are known to be either zero or one and return
- /// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
- /// those that are shared by every vector element.
- /// Targets can implement the computeKnownBitsForTargetNode method in the
- /// TargetLowering class to allow target nodes to be understood.
+ /// them in the KnownZero/KnownOne bitsets. Targets can implement the
+ /// computeKnownBitsForTargetNode method in the TargetLowering class to allow
+ /// target nodes to be understood.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
unsigned Depth = 0) const;
- /// Determine which bits of Op are known to be either zero or one and return
- /// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
- /// us to only collect the known bits that are shared by the requested vector
- /// elements.
- /// Targets can implement the computeKnownBitsForTargetNode method in the
- /// TargetLowering class to allow target nodes to be understood.
- void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
- const APInt &DemandedElts, unsigned Depth = 0) const;
-
/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit
/// is set.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=285381&r1=285380&r2=285381&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Oct 27 23:01:12 2016
@@ -2007,26 +2007,9 @@ static const APInt *getValidShiftAmountC
}
/// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
-/// those that are shared by every vector element.
+/// them in the KnownZero/KnownOne bitsets.
void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
APInt &KnownOne, unsigned Depth) const {
- EVT VT = Op.getValueType();
- APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
- : APInt(1, 1);
- computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
-}
-
-/// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
-/// us to only collect the known bits that are shared by the requested vector
-/// elements.
-/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
-/// should be added when they become necessary.
-void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
- APInt &KnownOne, const APInt &DemandedElts,
- unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
@@ -2034,10 +2017,6 @@ void SelectionDAG::computeKnownBits(SDVa
return; // Limit search depth.
APInt KnownZero2, KnownOne2;
- unsigned NumElts = DemandedElts.getBitWidth();
-
- if (DemandedElts == APInt(NumElts, 0))
- return; // No demanded elts, better to assume we don't know anything.
switch (Op.getOpcode()) {
case ISD::Constant:
@@ -2046,15 +2025,9 @@ void SelectionDAG::computeKnownBits(SDVa
KnownZero = ~KnownOne;
break;
case ISD::BUILD_VECTOR:
- // Collect the known bits that are shared by every demanded vector element.
- assert(NumElts == Op.getValueType().getVectorNumElements() &&
- "Unexpected vector size");
+ // Collect the known bits that are shared by every vector element.
KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- if (!DemandedElts[i])
- continue;
-
- SDValue SrcOp = Op.getOperand(i);
+ for (SDValue SrcOp : Op->ops()) {
computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
@@ -2065,72 +2038,16 @@ void SelectionDAG::computeKnownBits(SDVa
KnownZero2 = KnownZero2.trunc(BitWidth);
}
- // Known bits are the values that are shared by every demanded element.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- }
- break;
- case ISD::VECTOR_SHUFFLE: {
- // Collect the known bits that are shared by every vector element referenced
- // by the shuffle.
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
- assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- int M = SVN->getMaskElt(i);
- if (M < 0) {
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- // FIXME: Is this too pessimistic?
- KnownZero = KnownOne = APInt(BitWidth, 0);
- break;
- }
- if (!DemandedElts[i])
- continue;
-
- if ((unsigned)M < NumElts)
- DemandedLHS.setBit((unsigned)M % NumElts);
- else
- DemandedRHS.setBit((unsigned)M % NumElts);
- }
- // Known bits are the values that are shared by every demanded element.
- if (DemandedLHS != APInt(NumElts, 0)) {
- SDValue LHS = Op.getOperand(0);
- computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1);
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- }
- if (DemandedRHS != APInt(NumElts, 0)) {
- SDValue RHS = Op.getOperand(1);
- computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1);
+ // Known bits are the values that are shared by every element.
+ // TODO: support per-element known bits.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
}
break;
- }
- case ISD::EXTRACT_SUBVECTOR: {
- // If we know the element index, just demand that subvector elements,
- // otherwise demand them all.
- SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
- computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1);
- } else {
- computeKnownBits(Src, KnownZero, KnownOne, Depth + 1);
- }
- break;
- }
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
@@ -2289,8 +2206,7 @@ void SelectionDAG::computeKnownBits(SDVa
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownOne &= InputDemandedBits;
KnownZero &= InputDemandedBits;
@@ -2337,8 +2253,7 @@ void SelectionDAG::computeKnownBits(SDVa
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
@@ -2351,8 +2266,7 @@ void SelectionDAG::computeKnownBits(SDVa
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
bool SignBitKnownZero = KnownZero.isNegative();
@@ -2525,28 +2439,16 @@ void SelectionDAG::computeKnownBits(SDVa
// At the moment we keep this simple and skip tracking the specific
// element. This way we get the lowest common denominator for all elements
// of the vector.
- SDValue InVec = Op.getOperand(0);
- SDValue EltNo = Op.getOperand(1);
- EVT VecVT = InVec.getValueType();
+ // TODO: get information for given vector element
const unsigned BitWidth = Op.getValueSizeInBits();
- const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
- const unsigned NumSrcElts = VecVT.getVectorNumElements();
+ const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
if (BitWidth > EltBitWidth) {
KnownZero = KnownZero.trunc(EltBitWidth);
KnownOne = KnownOne.trunc(EltBitWidth);
}
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
- if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
- // If we know the element index, just demand that vector element.
- unsigned Idx = ConstEltNo->getZExtValue();
- APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
- computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1);
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
- }
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (BitWidth > EltBitWidth) {
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
Modified: llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll?rev=285381&r1=285380&r2=285381&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll Thu Oct 27 23:01:12 2016
@@ -465,9 +465,11 @@ define <4 x i64> @shuffle_v4i64_67zz(<4
; AVX1-LABEL: shuffle_v4i64_67zz:
; AVX1: ## BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_67zz:
Modified: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits-vector.ll?rev=285381&r1=285380&r2=285381&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll Thu Oct 27 23:01:12 2016
@@ -5,14 +5,16 @@
define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
; X32-LABEL: knownbits_mask_extract_sext:
; X32: # BB#0:
-; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpextrw $0, %xmm0, %eax
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vmovd %xmm0, %eax
+; X32-NEXT: cwtl
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_extract_sext:
; X64: # BB#0:
-; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpextrw $0, %xmm0, %eax
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: cwtl
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%2 = extractelement <8 x i16> %1, i32 0
@@ -29,10 +31,17 @@ define float @knownbits_mask_extract_uit
; X32-NEXT: subl $16, %esp
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
+; X32-NEXT: vpextrd $1, %xmm0, %eax
; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setns %cl
; X32-NEXT: fildll {{[0-9]+}}(%esp)
+; X32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X32-NEXT: fstps {{[0-9]+}}(%esp)
-; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovss %xmm0, (%esp)
+; X32-NEXT: flds (%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
@@ -42,7 +51,18 @@ define float @knownbits_mask_extract_uit
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
; X64-NEXT: vmovq %xmm0, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: js .LBB1_1
+; X64-NEXT: # BB#2:
+; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
+; X64-NEXT: retq
+; X64-NEXT: .LBB1_1:
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: shrq %rcx
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: orq %rcx, %rax
; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
+; X64-NEXT: vaddss %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>
%2 = extractelement <2 x i64> %1, i32 0
@@ -54,15 +74,15 @@ define <4 x i32> @knownbits_mask_shuffle
; X32-LABEL: knownbits_mask_shuffle_sext:
; X32: # BB#0:
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-NEXT: vpmovsxwd %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_shuffle_sext:
; X64: # BB#0:
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X64-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -75,14 +95,22 @@ define <4 x float> @knownbits_mask_shuff
; X32: # BB#0:
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
+; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; X32-NEXT: vpsrld $16, %xmm0, %xmm0
+; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_shuffle_uitofp:
; X64: # BB#0:
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
+; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; X64-NEXT: vpsrld $16, %xmm0, %xmm0
+; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
; X64-NEXT: retq
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
More information about the llvm-commits
mailing list