[llvm] r350084 - [X86] Remove check that avoids creating PMULDQ with illegal types. Rely on SplitOpsAndApply to legalize it.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 26 19:37:04 PST 2018


Author: ctopper
Date: Wed Dec 26 19:37:04 2018
New Revision: 350084

URL: http://llvm.org/viewvc/llvm-project?rev=350084&view=rev
Log:
[X86] Remove check that avoids creating PMULDQ with illegal types. Rely on SplitOpsAndApply to legalize it.

Create PMULDQ/PMULUDQ as long as the number of elements is a power of 2.

This seems to give some improvements in our ability to use SimplifyDemandedBits.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/mulvi32.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350084&r1=350083&r2=350084&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Dec 26 19:37:04 2018
@@ -35067,7 +35067,8 @@ static SDValue combineMulToPMULDQ(SDNode
 
   // Only support vXi64 vectors.
   if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 ||
-      !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+      VT.getVectorNumElements() < 2 ||
+      !isPowerOf2_32(VT.getVectorNumElements()))
     return SDValue();
 
   SDValue N0 = N->getOperand(0);

Modified: llvm/trunk/test/CodeGen/X86/mulvi32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulvi32.ll?rev=350084&r1=350083&r2=350084&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulvi32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulvi32.ll Wed Dec 26 19:37:04 2018
@@ -131,29 +131,24 @@ define <4 x i32> @_mul4xi32b(<4 x i32>,
 define <4 x i64> @_mul4xi32toi64a(<4 x i32>, <4 x i32>) {
 ; SSE2-LABEL: _mul4xi32toi64a:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    pxor %xmm3, %xmm3
-; SSE2-NEXT:    movdqa %xmm0, %xmm4
-; SSE2-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; SSE2-NEXT:    pmuludq %xmm4, %xmm2
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT:    pmuludq %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; SSE2-NEXT:    pmuludq %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,1,3,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,1,3,3]
+; SSE2-NEXT:    pmuludq %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: _mul4xi32toi64a:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE42-NEXT:    pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
-; SSE42-NEXT:    pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
-; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE42-NEXT:    pmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
 ; SSE42-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
 ; SSE42-NEXT:    pmuludq %xmm3, %xmm2
-; SSE42-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
-; SSE42-NEXT:    pmuludq %xmm4, %xmm0
-; SSE42-NEXT:    movdqa %xmm2, %xmm1
+; SSE42-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,2,3,3]
+; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE42-NEXT:    pmuludq %xmm3, %xmm1
+; SSE42-NEXT:    movdqa %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: _mul4xi32toi64a:




More information about the llvm-commits mailing list