[llvm] r218589 - [x86] Delete a bunch of really bad and totally unnecessary code in the

Sun Sep 28 19:01:20 PDT 2014

Author: chandlerc
Date: Sun Sep 28 21:01:20 2014
New Revision: 218589

URL: http://llvm.org/viewvc/llvm-project?rev=218589&view=rev
Log:
[x86] Delete a bunch of really bad and totally unnecessary code in the
X86 target-specific DAG combining that tried to convert VSELECT nodes
into VECTOR_SHUFFLE nodes that it "knew" would lower into
immediate-controlled blend nodes.

Turns out, we have perfectly good lowering of all these VSELECT nodes,
and indeed that lowering already knows how to handle lowering through
BLENDI to immediate-controlled blend nodes. The code just wasn't getting
used much because this thing forced the world to go through the vector
shuffle lowering. Yuck.

This also exposes that I was too aggressive in avoiding domain crossing
in v218588 with that lowering -- when the other option is to expand into
two 128-bit vectors, it is worth domain crossing. Restore that behavior
now that we have nice tests covering it.

The test updates here fall into two camps. One is where previously we
ended up with an unsigned encoding of the blend operand and now we get
a signed encoding. In most of those places there were elaborate comments
explaining exactly what these operands really mean. Rather than that,
just switch these tests to use the nicely decoded comments that make it
obvious that the final shuffle matches.

The other updates are just removing pointless domain crossing by
blending integers with PBLENDW rather than BLENDPS.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-blend.ll
    llvm/trunk/test/CodeGen/X86/blend-msb.ll
    llvm/trunk/test/CodeGen/X86/sse41-blend.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=218589&r1=218588&r2=218589&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 28 21:01:20 2014
@@ -11797,43 +11797,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(S
   return SDValue();
 }
 
-// This function assumes its argument is a BUILD_VECTOR of constants or
-// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
-// true.
-static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
-                                    unsigned &MaskValue) {
-  MaskValue = 0;
-  unsigned NumElems = BuildVector->getNumOperands();
-  // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
-  unsigned NumLanes = (NumElems - 1) / 8 + 1;
-  unsigned NumElemsInLane = NumElems / NumLanes;
-
-  // Blend for v16i16 should be symetric for the both lanes.
-  for (unsigned i = 0; i < NumElemsInLane; ++i) {
-    SDValue EltCond = BuildVector->getOperand(i);
-    SDValue SndLaneEltCond =
-        (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
-
-    int Lane1Cond = -1, Lane2Cond = -1;
-    if (isa<ConstantSDNode>(EltCond))
-      Lane1Cond = !isZero(EltCond);
-    if (isa<ConstantSDNode>(SndLaneEltCond))
-      Lane2Cond = !isZero(SndLaneEltCond);
-
-    if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
-      // Lane1Cond != 0, means we want the first argument.
-      // Lane1Cond == 0, means we want the second argument.
-      // The encoding of this argument is 0 for the first argument, 1
-      // for the second. Therefore, invert the condition.
-      MaskValue |= !Lane1Cond << i;
-    else if (Lane1Cond < 0)
-      MaskValue |= !Lane2Cond << i;
-    else
-      return false;
-  }
-  return true;
-}
-
 /// \brief Try to lower a VSELECT instruction to an immediate-controlled blend
 /// instruction.
 static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget,
@@ -11883,17 +11846,18 @@ static SDValue lowerVSELECTtoBLENDI(SDVa
   } else {
     // Everything else uses a generic blend mask computation with a custom type.
     if (VT.isInteger()) {
-      if (VT.is256BitVector()) {
-        // The 256-bit integer blend instructions are only available on AVX2.
-        if (!Subtarget->hasAVX2())
-          return SDValue();
-
-        // We do the blend on v8i32 for 256-bit integer types.
-        BlendVT = MVT::v8i32;
-      } else {
+      if (VT.is256BitVector())
+        // We cast to floating point types if integer blends aren't available,
+        // and we coerce integer blends when available to occur on the v8i32
+        // type.
+        BlendVT = Subtarget->hasAVX2()
+                      ? MVT::v8i32
+                      : MVT::getVectorVT(
+                            MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
+                            VT.getVectorNumElements());
+      else
         // For 128-bit vectors we do the blend on v8i16 types.
         BlendVT = MVT::v8i16;
-      }
     }
     assert(BlendVT.getVectorNumElements() <= 8 &&
            "Cannot blend more than 8 elements with an immediate!");
@@ -21718,57 +21682,6 @@ matchIntegerMINMAX(SDValue Cond, EVT VT,
   return std::make_pair(Opc, NeedSplit);
 }
 
-static SDValue
-TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget *Subtarget) {
-  SDLoc dl(N);
-  SDValue Cond = N->getOperand(0);
-  SDValue LHS = N->getOperand(1);
-  SDValue RHS = N->getOperand(2);
-
-  if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
-    SDValue CondSrc = Cond->getOperand(0);
-    if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
-      Cond = CondSrc->getOperand(0);
-  }
-
-  MVT VT = N->getSimpleValueType(0);
-  MVT EltVT = VT.getVectorElementType();
-  unsigned NumElems = VT.getVectorNumElements();
-  // There is no blend with immediate in AVX-512.
-  if (VT.is512BitVector())
-    return SDValue();
-
-  if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
-    return SDValue();
-  if (!Subtarget->hasInt256() && VT == MVT::v16i16)
-    return SDValue();
-
-  if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
-    return SDValue();
-
-  // A vselect where all conditions and data are constants can be optimized into
-  // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
-  if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
-      ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
-    return SDValue();
-
-  unsigned MaskValue = 0;
-  if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
-    return SDValue();
-
-  SmallVector<int, 8> ShuffleMask(NumElems, -1);
-  for (unsigned i = 0; i < NumElems; ++i) {
-    // Be sure we emit undef where we can.
-    if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF)
-      ShuffleMask[i] = -1;
-    else
-      ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
-  }
-
-  return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
-}
-
 /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
 /// nodes.
 static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
@@ -22318,23 +22231,6 @@ static SDValue PerformSELECTCombine(SDNo
       DCI.CommitTargetLoweringOpt(TLO);
   }
 
-  // We should generate an X86ISD::BLENDI from a vselect if its argument
-  // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
-  // constants. This specific pattern gets generated when we split a
-  // selector for a 512 bit vector in a machine without AVX512 (but with
-  // 256-bit vectors), during legalization:
-  //
-  // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
-  //
-  // Iff we find this pattern and the build_vectors are built from
-  // constants, we translate the vselect into a shuffle_vector that we
-  // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
-  if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
-    SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
-    if (Shuffle.getNode())
-      return Shuffle;
-  }
-
   return SDValue();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/avx-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-blend.ll?rev=218589&r1=218588&r2=218589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-blend.ll Sun Sep 28 21:01:20 2014
@@ -21,7 +21,7 @@ define <4 x float> @vsel_float(<4 x floa
 
 
 ;CHECK-LABEL: vsel_i32:
-;CHECK: vblendps   $10, %xmm1, %xmm0, %xmm0
+;CHECK: vpblendw {{.*}} ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -61,13 +61,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1,
 
 ;CHECK-LABEL: vsel_float8:
 ;CHECK-NOT: vinsertf128
-; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
-; which translates into the boolean mask (big endian representation):
-; 00010001 = 17.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 11101110 = 238.
-;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
+;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
 ;CHECK: ret
 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -76,7 +70,7 @@ define <8 x float> @vsel_float8(<8 x flo
 
 ;CHECK-LABEL: vsel_i328:
 ;CHECK-NOT: vinsertf128
-;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
+;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
 ;CHECK-NEXT: ret
 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2

Modified: llvm/trunk/test/CodeGen/X86/blend-msb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/blend-msb.ll?rev=218589&r1=218588&r2=218589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/blend-msb.ll (original)
+++ llvm/trunk/test/CodeGen/X86/blend-msb.ll Sun Sep 28 21:01:20 2014
@@ -22,17 +22,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1,
 }
 
 ;CHECK-LABEL: vsel_8xi16:
-; The select mask is
-; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
-; which translates into the boolean mask (big endian representation):
-; 00010001 = 17.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 11101110 = 238.
-; According to the ABI:
-; v1 is in xmm0 => first argument is xmm0.
-; v2 is in xmm1 => second argument is xmm1.
-;CHECK: pblendw $238, %xmm1, %xmm0
+;CHECK: pblendw {{.*}} ## xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
 ;CHECK: ret
 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2

Modified: llvm/trunk/test/CodeGen/X86/sse41-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-blend.ll?rev=218589&r1=218588&r2=218589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-blend.ll Sun Sep 28 21:01:20 2014
@@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x floa
 
 
 ;CHECK-LABEL: vsel_4xi8:
-;CHECK: blendps
+;CHECK: blendw
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
@@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1,
 }
 
 ;CHECK-LABEL: vsel_4xi16:
-;CHECK: blendps
+;CHECK: blendw
 ;CHECK: ret
 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
@@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %
 
 
 ;CHECK-LABEL: vsel_i32:
-;CHECK: blendps
+;CHECK: blendw
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2