[llvm] r307254 - [X86][SSE4A] Add support for shuffle combining to EXTRQ.

Thu Jul 6 05:22:58 PDT 2017

Author: rksimon
Date: Thu Jul  6 05:22:58 2017
New Revision: 307254

URL: http://llvm.org/viewvc/llvm-project?rev=307254&view=rev
Log:
[X86][SSE4A] Add support for shuffle combining to EXTRQ.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307254&r1=307253&r2=307254&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul  6 05:22:58 2017
@@ -9364,7 +9364,7 @@ static bool matchVectorShuffleAsEXTRQ(MV
   int Idx = -1;
   for (int i = 0; i != Len; ++i) {
     int M = Mask[i];
-    if (M < 0)
+    if (M == SM_SentinelUndef)
       continue;
     SDValue &V = (M < Size ? V1 : V2);
     M = M % Size;
@@ -27696,6 +27696,33 @@ static bool combineX86ShuffleChain(Array
     return true;
   }
 
+  // Annoyingly, SSE4A instructions don't map into the above match helpers.
+  if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
+    ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
+    ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
+
+    APInt Zeroable(NumMaskElts, 0);
+    for (unsigned i = 0; i != NumMaskElts; ++i)
+      if (isUndefOrZero(Mask[i]))
+        Zeroable.setBit(i);
+
+    uint64_t BitLen, BitIdx;
+    if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
+                                  Zeroable)) {
+      if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
+        return false; // Nothing to do!
+      V1 = DAG.getBitcast(ShuffleVT, V1);
+      DCI.AddToWorklist(V1.getNode());
+      Res = DAG.getNode(X86ISD::EXTRQI, DL, ShuffleVT, V1,
+                        DAG.getConstant(BitLen, DL, MVT::i8),
+                        DAG.getConstant(BitIdx, DL, MVT::i8));
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
+  }
+
   // Don't try to re-form single instruction chains under any circumstances now
   // that we've done encoding canonicalization for them.
   if (Depth < 2)

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll?rev=307254&r1=307253&r2=307254&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll Thu Jul  6 05:22:58 2017
@@ -9,30 +9,20 @@
 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
 
 define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
-; SSE-LABEL: combine_extrqi_pshufb_16i8:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_extrqi_pshufb_16i8:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_extrqi_pshufb_16i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
   %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
   ret <16 x i8> %2
 }
 
 define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
-; SSE-LABEL: combine_extrqi_pshufb_8i16:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_extrqi_pshufb_8i16:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_extrqi_pshufb_8i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
   %1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = bitcast <8 x i16> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@@ -43,9 +33,8 @@ define <8 x i16> @combine_extrqi_pshufb_
 define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
 ; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_insertqi_pshufb_16i8:
@@ -65,9 +54,8 @@ define <16 x i8> @combine_insertqi_pshuf
 define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
 ; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_insertqi_pshufb_8i16: