[llvm] r329509 - [DAGCombiner] Add a combine to turn a build vector of zero extends of extract vector elts into a vector zero extend and possibly an extract subvector.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 7 12:09:50 PDT 2018
Author: ctopper
Date: Sat Apr 7 12:09:50 2018
New Revision: 329509
URL: http://llvm.org/viewvc/llvm-project?rev=329509&view=rev
Log:
[DAGCombiner] Add a combine to turn a build vector of zero extends of extract vector elts into a vector zero extend and possibly an extract subvector.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
llvm/trunk/test/CodeGen/X86/mulvi32.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=329509&r1=329508&r2=329509&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Apr 7 12:09:50 2018
@@ -454,6 +454,7 @@ namespace {
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
@@ -14977,6 +14978,54 @@ SDValue DAGCombiner::reduceBuildVecToShu
return Shuffles[0];
}
+// Try to turn a build vector of zero extends of extract vector elts into a
+// a vector zero extend and possibly an extract subvector.
+// TODO: Support sign extend or any extend?
+// TODO: Allow undef elements?
+// TODO: Don't require the extracts to start at element 0.
+SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
+ if (LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> int64_t {
+ if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
+ if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
+ return C->getZExtValue();
+ return -1;
+ };
+
+ // Make sure the first element matches
+ // (zext (extract_vector_elt X, C))
+ int64_t Offset = checkElem(Op0);
+ if (Offset < 0)
+ return SDValue();
+
+ unsigned NumElems = N->getNumOperands();
+ SDValue In = Op0.getOperand(0).getOperand(0);
+ EVT InSVT = In.getValueType().getScalarType();
+ EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
+
+ // Don't create an illegal input type after type legalization.
+ if (LegalTypes && !TLI.isTypeLegal(InVT))
+ return SDValue();
+
+ // Ensure all the elements come from the same vector and are adjacent.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ if ((Offset + i) != checkElem(N->getOperand(i)))
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
+ Op0.getOperand(0).getOperand(1));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -15036,6 +15085,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
Op0.getOperand(0), Op0.getOperand(1));
}
+ if (SDValue V = convertBuildVecZextToZext(N))
+ return V;
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
Modified: llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll?rev=329509&r1=329508&r2=329509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll Sat Apr 7 12:09:50 2018
@@ -36,18 +36,14 @@ define float @g(<4 x i16>* nocapture %in
ret float %3
}
-; The backend generates for the following code an
-; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16)))
-;
-; The and is not redundant and cannot be removed. Since
-; extract_vector_elt is doing an implicit any_ext, the and
-; is required to guarantee that the top bits are set to zero.
-
-; Ideally should be a zext from <4 x i8> to <4 x 32>.
+; Make sure we generate zext from <4 x i8> to <4 x 32>.
; CHECK-LABEL: h:
; CHECK: vld1.32
-; CHECK: uxtb
+; CHECK: vmovl.u8 q8, d16
+; CHECK: vmovl.u16 q8, d16
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
define <4 x i32> @h(<4 x i8> *%in) {
%1 = load <4 x i8>, <4 x i8>* %in, align 4
%2 = extractelement <4 x i8> %1, i32 0
Modified: llvm/trunk/test/CodeGen/X86/mulvi32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulvi32.ll?rev=329509&r1=329508&r2=329509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulvi32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulvi32.ll Sat Apr 7 12:09:50 2018
@@ -167,37 +167,34 @@ define <4 x i64> @_mul4xi32toi64a(<4 x i
;
; SSE42-LABEL: _mul4xi32toi64a:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm3, %xmm3
+; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
-; SSE42-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; SSE42-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE42-NEXT: pmuludq %xmm0, %xmm1
-; SSE42-NEXT: pmuludq %xmm4, %xmm2
-; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
+; SSE42-NEXT: pmuludq %xmm3, %xmm2
+; SSE42-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; SSE42-NEXT: pmuludq %xmm4, %xmm0
+; SSE42-NEXT: movdqa %xmm2, %xmm1
; SSE42-NEXT: retq
;
; AVX1-LABEL: _mul4xi32toi64a:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: _mul4xi32toi64a:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%f00 = extractelement <4 x i32> %0, i32 0
More information about the llvm-commits
mailing list