[llvm] r215089 - [x86] Fix a miscompile in the new shuffle lowering found through the new

Thu Aug 7 01:11:31 PDT 2014

Author: chandlerc
Date: Thu Aug  7 03:11:31 2014
New Revision: 215089

URL: http://llvm.org/viewvc/llvm-project?rev=215089&view=rev
Log:
[x86] Fix a miscompile in the new shuffle lowering found through the new
fuzz testing.

The function which tested for adjacency did what it said on the tin, but
when I called it, I wanted it to do something more thorough: I wanted to
know if the *pairs* of shuffle elements were adjacent and started at
0 mod 2. In one place I had the decency to try to test for this, but in
the other it was completely skipped, miscompiling this test case. Fix
this by making the helper actually do what I wanted it to do everywhere
I called it (and removing the now redundant code in one place).

I *really* dislike the name "canWidenShuffleElements" for this
predicate. If anyone can come up with a better name, please let me know.
The other name I thought about was "canWidenShuffleMask" but is it
really widening the mask to reduce the number of lanes shuffled? I don't
know. Naming things is hard.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=215089&r1=215088&r2=215089&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug  7 03:11:31 2014
@@ -8234,10 +8234,11 @@ static SDValue lower128BitVectorShuffle(
   }
 }
 
-/// \brief Tiny helper function to test whether adjacent masks are sequential.
-static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
+/// \brief Tiny helper function to test whether a shuffle mask could be
+/// simplified by widening the elements being shuffled.
+static bool canWidenShuffleElements(ArrayRef<int> Mask) {
   for (int i = 0, Size = Mask.size(); i < Size; i += 2)
-    if (Mask[i] + 1 != Mask[i+1])
+    if (Mask[i] % 2 != 0 || Mask[i] + 1 != Mask[i+1])
       return false;
 
   return true;
@@ -8291,7 +8292,7 @@ static SDValue lowerVectorShuffle(SDValu
   // but it might be interesting to form i128 integers to handle flipping the
   // low and high halves of AVX 256-bit vectors.
   if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
-      areAdjacentMasksSequential(Mask)) {
+      canWidenShuffleElements(Mask)) {
     SmallVector<int, 8> NewMask;
     for (int i = 0, Size = Mask.size(); i < Size; i += 2)
       NewMask.push_back(Mask[i] / 2);
@@ -19517,8 +19518,7 @@ static SDValue PerformTargetShuffleCombi
 
     // See if this reduces to a PSHUFD which is no more expensive and can
     // combine with more operations.
-    if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
-        areAdjacentMasksSequential(Mask)) {
+    if (canWidenShuffleElements(Mask)) {
       int DMask[] = {-1, -1, -1, -1};
       int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
       DMask[DOffset + 0] = DOffset + Mask[0] / 2;

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=215089&r1=215088&r2=215089&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Thu Aug  7 03:11:31 2014
@@ -17,6 +17,13 @@ define <4 x i32> @shuffle_v4i32_0020(<4
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
   ret <4 x i32> %shuffle
 }
+define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0112
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,1,1,2]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
+  ret <4 x i32> %shuffle
+}
 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-SSE2-LABEL: @shuffle_v4i32_0300
 ; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]