[llvm] r356989 - [TargetLowering] Add SimplifyDemandedBits support for ISD::INSERT_VECTOR_ELT
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 05:32:02 PDT 2019
Author: rksimon
Date: Tue Mar 26 05:32:01 2019
New Revision: 356989
URL: http://llvm.org/viewvc/llvm-project?rev=356989&view=rev
Log:
[TargetLowering] Add SimplifyDemandedBits support for ISD::INSERT_VECTOR_ELT
This helps us relax the extension of a lot of scalar elements before they are inserted into a vector.
Its exposes an issue in DAGCombiner::convertBuildVecZextToZext as some/all the zero-extensions may be relaxed to ANY_EXTEND, so we need to handle that case to avoid a couple of AVX2 VPMOVZX test regressions.
Once this is in it should be easier to fix a number of remaining failures to fold loads into VBROADCAST nodes.
Differential Revision: https://reviews.llvm.org/D59484
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
llvm/trunk/test/CodeGen/X86/mulvi32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Mar 26 05:32:01 2019
@@ -16824,7 +16824,7 @@ SDValue DAGCombiner::reduceBuildVecToShu
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
-// TODO: Support sign extend or any extend?
+// TODO: Support sign extend?
// TODO: Allow undef elements?
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
@@ -16832,9 +16832,12 @@ SDValue DAGCombiner::convertBuildVecZext
EVT VT = N->getValueType(0);
+ bool FoundZeroExtend = false;
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
- if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ unsigned Opc = Op.getOpcode();
+ FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+ if ((Op.getOpcode() == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
@@ -16866,7 +16869,8 @@ SDValue DAGCombiner::convertBuildVecZext
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+ return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+ VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Tue Mar 26 05:32:01 2019
@@ -557,6 +557,44 @@ bool TargetLowering::SimplifyDemandedBit
Known.Zero &= Known2.Zero;
}
return false; // Don't fall through, will infinitely loop.
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+
+ // If index isn't constant, assume we need all vector elements AND the
+ // inserted element.
+ APInt DemandedVecElts(OriginalDemandedElts);
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+ unsigned Idx = CIdx->getZExtValue();
+ DemandedVecElts.clearBit(Idx);
+
+ // Inserted element is not required.
+ if (!OriginalDemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ }
+
+ KnownBits KnownScl;
+ unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+ APInt DemandedSclBits = OriginalDemandedBits.zextOrTrunc(NumSclBits);
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ Known = KnownScl.zextOrTrunc(BitWidth, false);
+
+ KnownBits KnownVec;
+ if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
+ KnownVec, TLO, Depth + 1))
+ return true;
+
+ if (!!DemandedVecElts) {
+ Known.One &= KnownVec.One;
+ Known.Zero &= KnownVec.Zero;
+ }
+
+ return false;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Tue Mar 26 05:32:01 2019
@@ -144,9 +144,9 @@ define float @signbits_ashr_insert_ashr_
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: shrdl $30, %ecx, %eax
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: sarl $30, %ecx
+; X32-NEXT: shll $2, %eax
; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; X32-NEXT: vpsrlq $3, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/mulvi32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulvi32.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulvi32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulvi32.ll Tue Mar 26 05:32:01 2019
@@ -312,18 +312,10 @@ define <4 x i64> @_mul4xi32toi64c(<4 x i
; %ext0 = zext <2 x i32> %0 to <2 x i64>
; %ext1 = zext <2 x i32> %1 to <2 x i64>
define <2 x i64> @_mul2xi64toi64a(<2 x i64>, <2 x i64>) {
-; SSE2-LABEL: _mul2xi64toi64a:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pmuludq %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: _mul2xi64toi64a:
-; SSE42: # %bb.0:
-; SSE42-NEXT: pmuludq %xmm1, %xmm0
-; SSE42-NEXT: retq
+; SSE-LABEL: _mul2xi64toi64a:
+; SSE: # %bb.0:
+; SSE-NEXT: pmuludq %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: _mul2xi64toi64a:
; AVX: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Tue Mar 26 05:32:01 2019
@@ -1736,7 +1736,7 @@ define <16 x i8> @insert_dup_mem_v16i8_i
define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsbl (%rdi), %eax
+; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
@@ -1745,7 +1745,7 @@ define <16 x i8> @insert_dup_mem_v16i8_s
;
; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsbl (%rdi), %eax
+; SSSE3-NEXT: movzbl (%rdi), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: pshufb %xmm1, %xmm0
@@ -1753,7 +1753,7 @@ define <16 x i8> @insert_dup_mem_v16i8_s
;
; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movsbl (%rdi), %eax
+; SSE41-NEXT: movzbl (%rdi), %eax
; SSE41-NEXT: movd %eax, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
@@ -1761,7 +1761,7 @@ define <16 x i8> @insert_dup_mem_v16i8_s
;
; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Tue Mar 26 05:32:01 2019
@@ -2652,7 +2652,7 @@ define <8 x i16> @insert_dup_mem_v8i16_i
define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
; SSE: # %bb.0:
-; SSE-NEXT: movswl (%rdi), %eax
+; SSE-NEXT: movzwl (%rdi), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
@@ -2660,7 +2660,7 @@ define <8 x i16> @insert_dup_mem_v8i16_s
;
; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: movswl (%rdi), %eax
+; AVX1-NEXT: movzwl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
@@ -2668,14 +2668,14 @@ define <8 x i16> @insert_dup_mem_v8i16_s
;
; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: movswl (%rdi), %eax
+; AVX2-NEXT: movzwl (%rdi), %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: movswl (%rdi), %eax
+; AVX512VL-NEXT: movzwl (%rdi), %eax
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Tue Mar 26 05:32:01 2019
@@ -4722,7 +4722,7 @@ define <16 x i16> @insert_dup_mem_v16i16
define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: movswl (%rdi), %eax
+; AVX1-NEXT: movzwl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
@@ -4731,14 +4731,14 @@ define <16 x i16> @insert_dup_mem_v16i16
;
; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: movswl (%rdi), %eax
+; AVX2-NEXT: movzwl (%rdi), %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: movswl (%rdi), %eax
+; AVX512VL-NEXT: movzwl (%rdi), %eax
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Tue Mar 26 05:32:01 2019
@@ -3154,7 +3154,7 @@ define <32 x i8> @insert_dup_mem_v32i8_i
define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) {
; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=356989&r1=356988&r2=356989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Tue Mar 26 05:32:01 2019
@@ -233,7 +233,7 @@ define <32 x i16> @insert_dup_mem_v32i16
define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
; KNL: ## %bb.0:
-; KNL-NEXT: movswl (%rdi), %eax
+; KNL-NEXT: movzwl (%rdi), %eax
; KNL-NEXT: vmovd %eax, %xmm0
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
; KNL-NEXT: vmovdqa %ymm0, %ymm1
@@ -241,7 +241,7 @@ define <32 x i16> @insert_dup_mem_v32i16
;
; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
; SKX: ## %bb.0:
-; SKX-NEXT: movswl (%rdi), %eax
+; SKX-NEXT: movzwl (%rdi), %eax
; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
More information about the llvm-commits
mailing list