[llvm] r230695 - [x86] Fix PR22706 where we would incorrectly try lower a v32i8 dynamic

Thu Feb 26 14:15:34 PST 2015

Author: chandlerc
Date: Thu Feb 26 16:15:34 2015
New Revision: 230695

URL: http://llvm.org/viewvc/llvm-project?rev=230695&view=rev
Log:
[x86] Fix PR22706 where we would incorrectly try lower a v32i8 dynamic
blend as legal.

We made the same mistake in two different places. Whenever we are custom
lowering a v32i8 blend we need to check whether we are custom lowering
it only for constant conditions that can be shuffled, or whether we
actually have AVX2 and full dynamic blending support on bytes. Both are
fixed, with comments added to make it clear what is going on and a new
test case.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vselect-avx.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=230695&r1=230694&r2=230695&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb 26 16:15:34 2015
@@ -10126,24 +10126,31 @@ SDValue X86TargetLowering::LowerVSELECT(
   if (!Subtarget->hasSSE41())
     return SDValue();
 
-  // Some types for vselect were previously set to Expand, not Legal or
-  // Custom. Return an empty SDValue so we fall-through to Expand, after
-  // the Custom lowering phase.
-  MVT VT = Op.getSimpleValueType();
-  switch (VT.SimpleTy) {
+  // Only some types will be legal on some subtargets. If we can emit a legal
+  // VSELECT-matching blend, return Op, and but if we need to expand, return
+  // a null value.
+  switch (Op.getSimpleValueType().SimpleTy) {
   default:
-    break;
+    // Most of the vector types have blends past SSE4.1.
+    return Op;
+
+  case MVT::v32i8:
+    // The byte blends for AVX vectors were introduced only in AVX2.
+    if (Subtarget->hasAVX2())
+      return Op;
+
+    return SDValue();
+
   case MVT::v8i16:
   case MVT::v16i16:
+    // AVX-512 BWI and VLX features support VSELECT with i16 elements.
     if (Subtarget->hasBWI() && Subtarget->hasVLX())
-      break;
+      return Op;
+
+    // FIXME: We should custom lower this by fixing the condition and using i8
+    // blends.
     return SDValue();
   }
-
-  // We couldn't create a "Blend with immediate" node.
-  // This node should still be legal, but we'll have to emit a blendv*
-  // instruction.
-  return Op;
 }
 
 static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
@@ -20784,7 +20791,17 @@ static SDValue PerformSELECTCombine(SDNo
     // lowered.
     if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
       return SDValue();
-    if (!Subtarget->hasSSE41() || VT == MVT::v16i16 || VT == MVT::v8i16)
+    // FIXME: We don't support i16-element blends currently. We could and
+    // should support them by making *all* the bits in the condition be set
+    // rather than just the high bit and using an i8-element blend.
+    if (VT.getScalarType() == MVT::i16)
+      return SDValue();
+    // Dynamic blending was only available from SSE4.1 onward.
+    if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+      return SDValue();
+    // Byte blends are only available in AVX2
+    if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
+        !Subtarget->hasAVX2())
       return SDValue();
 
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");

Modified: llvm/trunk/test/CodeGen/X86/vselect-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-avx.ll?rev=230695&r1=230694&r2=230695&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-avx.ll Thu Feb 26 16:15:34 2015
@@ -79,3 +79,14 @@ define void @test3(<4 x i32> %induction3
   store <4 x i16> %predphi, <4 x i16>* %tmp17, align 8
  ret void
 }
+
+; We shouldn't try to lower this directly using VSELECT because we don't have
+; vpblendvb in AVX1, only in AVX2. Instead, it should be expanded.
+;
+; CHECK-LABEL: PR22706:
+; CHECK: vpcmpgtb
+; CHECK: vpcmpgtb
+define <32 x i8> @PR22706(<32 x i1> %x) {
+  %tmp = select <32 x i1> %x, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %tmp
+}