[llvm] r215690 - [x86] Fix the very broken formation of vpunpck instructions in the

Chandler Carruth chandlerc at gmail.com
Thu Aug 14 20:54:49 PDT 2014


Author: chandlerc
Date: Thu Aug 14 22:54:49 2014
New Revision: 215690

URL: http://llvm.org/viewvc/llvm-project?rev=215690&view=rev
Log:
[x86] Fix the very broken formation of vpunpck instructions in the
target-specific shuffl DAG combines.

We were recognizing the paired shuffles backwards. This code needs to be
replaced anyways as we have the same functionality elsewhere, but I'll
do the refactoring in a follow-up, this is the minimal fix to the
behavior.

In addition to fixing miscompiles with the new vector shuffle lowering,
it also causes the canonicalization to kick in much better, selecting
the smaller encoding variants in lots of places in the new AVX path.
This still isn't quite ideal as we don't need both the shufpd and the
punpck instructions, but that'll get fixed in a follow-up patch.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-sext.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=215690&r1=215689&r2=215690&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug 14 22:54:49 2014
@@ -19496,7 +19496,7 @@ static bool combineX86ShufflesRecursivel
   while (Mask.size() > 1) {
     SmallVector<int, 16> NewMask;
     for (int i = 0, e = Mask.size()/2; i < e; ++i) {
-      if (Mask[2*i] % 2 != 0 || Mask[2*i] != Mask[2*i + 1] + 1) {
+      if (Mask[2*i] % 2 != 0 || Mask[2*i] + 1 != Mask[2*i + 1]) {
         NewMask.clear();
         break;
       }

Modified: llvm/trunk/test/CodeGen/X86/avx-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-sext.ll?rev=215690&r1=215689&r2=215690&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-sext.ll Thu Aug 14 22:54:49 2014
@@ -156,7 +156,7 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
 
 ; AVX-LABEL: sext_16i8_to_16i16
 ; AVX: vpmovsxbw
-; AVX: vmovhlps
+; AVX: vpunpckhqdq
 ; AVX: vpmovsxbw
 ; AVX: ret
 define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=215690&r1=215689&r2=215690&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Thu Aug 14 22:54:49 2014
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-AVX1
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -60,6 +61,14 @@ define <4 x i32> @shuffle_v4i32_3210(<4
   ret <4 x i32> %shuffle
 }
 
+define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-AVX1-LABEL: @shuffle_v4i32_2121
+; CHECK-AVX1:         vpshufd {{.*}} # xmm0 = xmm0[2,1,2,1]
+; CHECK-AVX1-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
+  ret <4 x i32> %shuffle
+}
+
 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SSE2-LABEL: @shuffle_v4f32_0001
 ; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,0,1]

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=215690&r1=215689&r2=215690&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Thu Aug 14 22:54:49 2014
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown"
 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: @shuffle_v4i64_0001
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm1 = xmm0[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
@@ -18,7 +18,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
@@ -41,7 +41,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
@@ -52,7 +52,7 @@ define <4 x i64> @shuffle_v4i64_1000(<4
 ; AVX1-LABEL: @shuffle_v4i64_1000
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
@@ -63,8 +63,8 @@ define <4 x i64> @shuffle_v4i64_2200(<4
 ; AVX1-LABEL: @shuffle_v4i64_2200
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
@@ -76,7 +76,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; AVX1-NEXT:    vpunpckhqdq {{.*}} # xmm1 = xmm1[1,1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
@@ -174,7 +174,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4
 ; AVX1-LABEL: @shuffle_v4i64_0124
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -185,7 +185,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4
 ; AVX1-LABEL: @shuffle_v4i64_0142
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpshufd {{.*}} # xmm2 = xmm2[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -197,7 +197,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -209,7 +209,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -229,7 +229,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -249,7 +249,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
-; AVX1-NEXT:    vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq





More information about the llvm-commits mailing list