[llvm-commits] [llvm] r135729 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/SIMD/notvunpcklpd.ll test/CodeGen/X86/SIMD/notvunpcklps.ll

Thu Jul 21 17:15:07 PDT 2011

Author: bruno
Date: Thu Jul 21 19:15:07 2011
New Revision: 135729

URL: http://llvm.org/viewvc/llvm-project?rev=135729&view=rev
Log:
-Inspected a AVX code block added by someone in early Feb. This was never used
and was actually very wrong, fix it and make it simpler. Also remove the
ConcatVectors function, which is unused now.

- Fix a introduction of useless nodes in r126664 and r126264. The
VUNPCKL* should never be introduced cause we don't want duplicate
nodes for 128 AVX and non-AVX modes, the actual instruction
difference only exists during isel, but not for target specific DAG
nodes. We only introduce V* target nodes when there is no 128-bit
version already there.

- Fix a fragile test and make it more useful.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklpd.ll
    llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklps.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=135729&r1=135728&r2=135729&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 21 19:15:07 2011
@@ -71,9 +71,6 @@
                                    SelectionDAG &DAG,
                                    DebugLoc dl);
 
-static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
-
-
 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
 /// sets things up to match to an AVX VEXTRACTF128 instruction or a
 /// simple subregister reference.  Idx is an index in the 128 bits we
@@ -151,34 +148,6 @@
   return SDValue();
 }
 
-/// Given two vectors, concat them.
-static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
-  DebugLoc dl = Lower.getDebugLoc();
-
-  assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
-
-  EVT VT = EVT::getVectorVT(*DAG.getContext(),
-                            Lower.getValueType().getVectorElementType(),
-                            Lower.getValueType().getVectorNumElements() * 2);
-
-  // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
-  assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
-
-  // Insert the upper subvector.
-  SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
-                                   DAG.getConstant(
-                                     // This is half the length of the result
-                                     // vector.  Start inserting the upper 128
-                                     // bits here.
-                                     Lower.getValueType().getVectorNumElements(),
-                                     MVT::i32),
-                                   DAG, dl);
-
-  // Insert the lower subvector.
-  Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
-  return Vec;
-}
-
 static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
   const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
   bool is64Bit = Subtarget->is64Bit();
@@ -2734,8 +2703,6 @@
   case X86ISD::MOVSD:
   case X86ISD::UNPCKLPS:
   case X86ISD::UNPCKLPD:
-  case X86ISD::VUNPCKLPS:
-  case X86ISD::VUNPCKLPD:
   case X86ISD::VUNPCKLPSY:
   case X86ISD::VUNPCKLPDY:
   case X86ISD::PUNPCKLWD:
@@ -2807,8 +2774,6 @@
   case X86ISD::MOVSD:
   case X86ISD::UNPCKLPS:
   case X86ISD::UNPCKLPD:
-  case X86ISD::VUNPCKLPS:
-  case X86ISD::VUNPCKLPD:
   case X86ISD::VUNPCKLPSY:
   case X86ISD::VUNPCKLPDY:
   case X86ISD::PUNPCKLWD:
@@ -4111,8 +4076,6 @@
       break;
     case X86ISD::UNPCKLPS:
     case X86ISD::UNPCKLPD:
-    case X86ISD::VUNPCKLPS:
-    case X86ISD::VUNPCKLPD:
     case X86ISD::VUNPCKLPSY:
     case X86ISD::VUNPCKLPDY:
       DecodeUNPCKLPMask(VT, ShuffleMask);
@@ -4545,31 +4508,8 @@
 
   EVT VT = Op.getValueType();
   EVT ExtVT = VT.getVectorElementType();
-
   unsigned NumElems = Op.getNumOperands();
 
-  // For AVX-length vectors, build the individual 128-bit pieces and
-  // use shuffles to put them in place.
-  if (VT.getSizeInBits() > 256 &&
-      Subtarget->hasAVX() &&
-      !ISD::isBuildVectorAllZeros(Op.getNode())) {
-    SmallVector<SDValue, 8> V;
-    V.resize(NumElems);
-    for (unsigned i = 0; i < NumElems; ++i) {
-      V[i] = Op.getOperand(i);
-    }
-
-    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
-
-    // Build the lower subvector.
-    SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
-    // Build the upper subvector.
-    SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
-                                NumElems/2);
-
-    return ConcatVectors(Lower, Upper, DAG);
-  }
-
   // All zero's:
   //  - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
   // All one's:
@@ -4731,6 +4671,27 @@
   if (IsAllConstants)
     return SDValue();
 
+  // For AVX-length vectors, build the individual 128-bit pieces and use
+  // shuffles to put them in place.
+  if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+    SmallVector<SDValue, 32> V;
+    for (unsigned i = 0; i < NumElems; ++i)
+      V.push_back(Op.getOperand(i));
+
+    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+    // Build both the lower and upper subvector.
+    SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+    SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+                                NumElems/2);
+
+    // Recreate the wider vector with the lower and upper part.
+    SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+                                DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+    return Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32),
+                              DAG, dl);
+  }
+
   // Let legalizer expand 2-wide build_vectors.
   if (EVTBits == 64) {
     if (NumNonZero == 1) {
@@ -5742,10 +5703,8 @@
   switch(VT.getSimpleVT().SimpleTy) {
   case MVT::v4i32: return X86ISD::PUNPCKLDQ;
   case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
-  case MVT::v4f32:
-    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
-  case MVT::v2f64:
-    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+  case MVT::v4f32: return X86ISD::UNPCKLPS;
+  case MVT::v2f64: return X86ISD::UNPCKLPD;
   case MVT::v8f32: return X86ISD::VUNPCKLPSY;
   case MVT::v4f64: return X86ISD::VUNPCKLPDY;
   case MVT::v16i8: return X86ISD::PUNPCKLBW;
@@ -6053,11 +6012,8 @@
 
   if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
       SVOp->getSplatIndex() == 0 && V2IsUndef) {
-    if (VT == MVT::v2f64) {
-      X86ISD::NodeType Opcode =
-        getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
-      return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
-    }
+    if (VT == MVT::v2f64)
+      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
     if (VT == MVT::v2i64)
       return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
   }
@@ -9725,9 +9681,6 @@
   case X86ISD::MOVSS:              return "X86ISD::MOVSS";
   case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
   case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
-  case X86ISD::VUNPCKLPS:          return "X86ISD::VUNPCKLPS";
-  case X86ISD::VUNPCKLPD:          return "X86ISD::VUNPCKLPD";
-  case X86ISD::VUNPCKLPSY:         return "X86ISD::VUNPCKLPSY";
   case X86ISD::VUNPCKLPDY:         return "X86ISD::VUNPCKLPDY";
   case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
   case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
@@ -12588,8 +12541,6 @@
   case X86ISD::PUNPCKLQDQ:
   case X86ISD::UNPCKLPS:
   case X86ISD::UNPCKLPD:
-  case X86ISD::VUNPCKLPS:
-  case X86ISD::VUNPCKLPD:
   case X86ISD::VUNPCKLPSY:
   case X86ISD::VUNPCKLPDY:
   case X86ISD::MOVHLPS:

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=135729&r1=135728&r2=135729&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Jul 21 19:15:07 2011
@@ -257,8 +257,6 @@
       MOVSS,
       UNPCKLPS,
       UNPCKLPD,
-      VUNPCKLPS,
-      VUNPCKLPD,
       VUNPCKLPSY,
       VUNPCKLPDY,
       UNPCKHPS,

Modified: llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklpd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklpd.ll?rev=135729&r1=135728&r2=135729&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklpd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklpd.ll Thu Jul 21 19:15:07 2011
@@ -13,7 +13,7 @@
 	%r3 = load <4 x double>* %r, align 8
 	%r4 = load <4 x double>* %rb, align 8
 	%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x double>> [#uses=1]
-; CHECK-NOT: vunpcklpd
+; CHECK-NOT: vunpcklpd %ymm
 	%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
 	store <4 x double> %r11, <4 x double>* %r12, align 4
 	ret void

Modified: llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklps.ll?rev=135729&r1=135728&r2=135729&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklps.ll (original)
+++ llvm/trunk/test/CodeGen/X86/SIMD/notvunpcklps.ll Thu Jul 21 19:15:07 2011
@@ -13,7 +13,7 @@
 	%r3 = load <8 x float>* %r, align 8
 	%r4 = load <8 x float>* %rb, align 8
 	%r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x float>> [#uses=1]
-; CHECK-NOT: vunpcklps
+; CHECK-NOT: vunpcklps %ymm
 	%r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0
 	store <8 x float> %r8, <8 x float>* %r9, align 4
 	ret void