[llvm] r307048 - [X86][SSE4A] Add support for combining from EXTRQI/INSERTQI shuffles

Mon Jul 3 13:58:16 PDT 2017

Author: rksimon
Date: Mon Jul  3 13:58:16 2017
New Revision: 307048

URL: http://llvm.org/viewvc/llvm-project?rev=307048&view=rev
Log:
[X86][SSE4A] Add support for combining from EXTRQI/INSERTQI shuffles

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307048&r1=307047&r2=307048&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jul  3 13:58:16 2017
@@ -4217,6 +4217,8 @@ static bool isTargetShuffle(unsigned Opc
   case X86ISD::PSHUFLW:
   case X86ISD::SHUFP:
   case X86ISD::INSERTPS:
+  case X86ISD::EXTRQI:
+  case X86ISD::INSERTQI:
   case X86ISD::PALIGNR:
   case X86ISD::VSHLDQ:
   case X86ISD::VSRLDQ:
@@ -5554,6 +5556,24 @@ static bool getTargetShuffleMask(SDNode
     DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
+  case X86ISD::EXTRQI:
+    if (VT == MVT::v16i8 && isa<ConstantSDNode>(N->getOperand(1)) &&
+        isa<ConstantSDNode>(N->getOperand(2))) {
+      int BitLen = N->getConstantOperandVal(1);
+      int BitIdx = N->getConstantOperandVal(2);
+      DecodeEXTRQIMask(BitLen, BitIdx, Mask);
+      IsUnary = true;
+    }
+    break;
+  case X86ISD::INSERTQI:
+    if (VT == MVT::v16i8 && isa<ConstantSDNode>(N->getOperand(2)) &&
+      isa<ConstantSDNode>(N->getOperand(3))) {
+      int BitLen = N->getConstantOperandVal(2);
+      int BitIdx = N->getConstantOperandVal(3);
+      DecodeINSERTQIMask(BitLen, BitIdx, Mask);
+      IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+    }
+    break;
   case X86ISD::UNPCKH:
     DecodeUNPCKHMask(VT, Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -35544,6 +35564,8 @@ SDValue X86TargetLowering::PerformDAGCom
   case X86ISD::PINSRW:      return combineVectorInsert(N, DAG, DCI, Subtarget);
   case X86ISD::SHUFP:       // Handle all target specific shuffles
   case X86ISD::INSERTPS:
+  case X86ISD::EXTRQI:
+  case X86ISD::INSERTQI:
   case X86ISD::PALIGNR:
   case X86ISD::VSHLDQ:
   case X86ISD::VSRLDQ:

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll?rev=307048&r1=307047&r2=307048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll Mon Jul  3 13:58:16 2017
@@ -11,14 +11,12 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.
 define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
 ; SSE-LABEL: combine_extrqi_pshufb_16i8:
 ; SSE:       # BB#0:
-; SSE-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_extrqi_pshufb_16i8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
 ; AVX-NEXT:    retq
   %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@@ -53,20 +51,19 @@ define <8 x i16> @combine_extrqi_pshufb_
 define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
 ; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_insertqi_pshufb_16i8:
 ; SSE42:       # BB#0:
-; SSE42-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE42-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE42-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
 ; SSE42-NEXT:    retq
 ;
 ; AVX-LABEL: combine_insertqi_pshufb_16i8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
 ; AVX-NEXT:    retq
   %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll?rev=307048&r1=307047&r2=307048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll Mon Jul  3 13:58:16 2017
@@ -66,10 +66,7 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(
 ;
 ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
 ; BTVER1:       # BB#0:
-; BTVER1-NEXT:    movdqa %xmm0, %xmm1
-; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
 ; BTVER1-NEXT:    retq
 ;
 ; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz:
@@ -91,10 +88,7 @@ define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(
 ;
 ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
 ; BTVER1:       # BB#0:
-; BTVER1-NEXT:    movdqa %xmm0, %xmm1
-; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
 ; BTVER1-NEXT:    retq
 ;
 ; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz: