[llvm] r307268 - [X86][SSE4A] Add support for shuffle combining to INSERTQI.

Thu Jul 6 08:34:17 PDT 2017

Author: rksimon
Date: Thu Jul  6 08:34:17 2017
New Revision: 307268

URL: http://llvm.org/viewvc/llvm-project?rev=307268&view=rev
Log:
[X86][SSE4A] Add support for shuffle combining to INSERTQI.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307268&r1=307267&r2=307268&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul  6 08:34:17 2017
@@ -27714,6 +27714,22 @@ static bool combineX86ShuffleChain(Array
                     /*AddTo*/ true);
       return true;
     }
+
+    if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+      if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+        return false; // Nothing to do!
+      V1 = DAG.getBitcast(IntMaskVT, V1);
+      DCI.AddToWorklist(V1.getNode());
+      V2 = DAG.getBitcast(IntMaskVT, V2);
+      DCI.AddToWorklist(V2.getNode());
+      Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
+                        DAG.getConstant(BitLen, DL, MVT::i8),
+                        DAG.getConstant(BitIdx, DL, MVT::i8));
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
   }
 
   // Don't try to re-form single instruction chains under any circumstances now

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll?rev=307268&r1=307267&r2=307268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll Thu Jul  6 08:34:17 2017
@@ -75,19 +75,10 @@ define <8 x i16> @combine_insertqi_pshuf
 }
 
 define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
-; SSE-LABEL: combine_pshufb_insertqi_pshufb:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_pshufb_insertqi_pshufb:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_pshufb_insertqi_pshufb:
+; ALL:       # BB#0:
+; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
   %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
   %2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)