[llvm] r229642 - [x86] Rewrite the byte shift detection to not use boolean variables to

Tue Feb 17 23:13:49 PST 2015

Author: chandlerc
Date: Wed Feb 18 01:13:48 2015
New Revision: 229642

URL: http://llvm.org/viewvc/llvm-project?rev=229642&view=rev
Log:
[x86] Rewrite the byte shift detection to not use boolean variables to
track state.

I didn't like this in the code review because the pattern tends to be
error prone, but I didn't see a clear way to rewrite it. Turns out that
there were bugs here, I found them when fuzz testing our shuffle
lowering for correctness on x86.

The core of the problem is that we need to consistently test all our
preconditions for the same directionality of shift and the same input
vector. Instead, formulate this as two predicates (one doesn't depend on
the input in any way), pass things like the directionality and input
vector as inputs, and loop over the alternatives.

This fixes a pattern of very rare miscompiles coming out of this code.
Turned up roughly 4 out of every 1 million v8 shuffles in my fuzz
testing. The new code is over half a million test runs with no failures
yet. I've also fuzzed every other function in the lowering code with
over 3.5 million test cases and not discovered any other miscompiles.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=229642&r1=229641&r2=229642&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 18 01:13:48 2015
@@ -7888,40 +7888,39 @@ static SDValue lowerVectorShuffleAsByteS
   // [  5, 6,  7, zz, zz, zz, zz, zz]
   // [ -1, 5,  6,  7, zz, zz, zz, zz]
   // [  1, 2, -1, -1, -1, -1, zz, zz]
-  auto MatchByteShift = [&](int Shift) -> SDValue {
-    bool MatchLeft = true, MatchRight = true;
-    for (int l = 0; l < NumElts; l += NumLaneElts) {
+
+  auto CheckZeros = [&](int Shift, bool LeftShift) {
+    for (int l = 0; l < NumElts; l += NumLaneElts)
       for (int i = 0; i < Shift; ++i)
-        MatchLeft &= Zeroable[l + i];
-      for (int i = NumLaneElts - Shift; i < NumLaneElts; ++i)
-        MatchRight &= Zeroable[l + i];
-    }
-    if (!(MatchLeft || MatchRight))
-      return SDValue();
+        if (!Zeroable[l + i + (LeftShift ? 0 : (NumLaneElts - Shift))])
+          return false;
+
+    return true;
+  };
 
-    bool MatchV1 = true, MatchV2 = true;
+  auto MatchByteShift = [&](int Shift, bool LeftShift, SDValue V) {
     for (int l = 0; l < NumElts; l += NumLaneElts) {
-      unsigned Pos = MatchLeft ? Shift + l : l;
-      unsigned Low = MatchLeft ? l : Shift + l;
+      unsigned Pos = LeftShift ? Shift + l : l;
+      unsigned Low = LeftShift ? l : Shift + l;
       unsigned Len = NumLaneElts - Shift;
-      MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
-      MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + NumElts);
+      if (!isSequentialOrUndefInRange(Mask, Pos, Len,
+                                      Low + (V == V1 ? 0 : NumElts)))
+        return SDValue();
     }
-    if (!(MatchV1 || MatchV2))
-      return SDValue();
 
     int ByteShift = Shift * Scale;
-    unsigned Op = MatchRight ? X86ISD::VSRLDQ : X86ISD::VSHLDQ;
-    SDValue V = MatchV1 ? V1 : V2;
+    unsigned Op = LeftShift ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
     V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
-    V = DAG.getNode(Op, DL, ShiftVT, V,
-                    DAG.getConstant(ByteShift, MVT::i8));
+    V = DAG.getNode(Op, DL, ShiftVT, V, DAG.getConstant(ByteShift, MVT::i8));
     return DAG.getNode(ISD::BITCAST, DL, VT, V);
   };
 
   for (int Shift = 1; Shift < NumLaneElts; ++Shift)
-    if (SDValue S = MatchByteShift(Shift))
-      return S;
+    for (bool LeftShift : {true, false})
+      if (CheckZeros(Shift, LeftShift))
+        for (SDValue V : {V1, V2})
+          if (SDValue S = MatchByteShift(Shift, LeftShift, V))
+            return S;
 
   // no match
   return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=229642&r1=229641&r2=229642&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Wed Feb 18 01:13:48 2015
@@ -2146,3 +2146,35 @@ define <8 x i16> @shuffle_v8i16_0123456z
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   ret <8 x i16> %shuffle
 }
+
+define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
+  ret <8 x i16> %shuffle
+}