[llvm-commits] [llvm] r148537 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Craig Topper craig.topper at gmail.com
Fri Jan 20 01:29:03 PST 2012


Author: ctopper
Date: Fri Jan 20 03:29:03 2012
New Revision: 148537

URL: http://llvm.org/viewvc/llvm-project?rev=148537&view=rev
Log:
Improve 256-bit shuffle splitting to allow 2 sources in each 128-bit lane. As long as only a single lane of the source is used in the lane in the destination. This makes the splitting match much closer to what happens with 256-bit shuffles when AVX is disabled and only 128-bit XMM is allowed.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148537&r1=148536&r2=148537&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 20 03:29:03 2012
@@ -3162,17 +3162,6 @@
   return (Val < 0) || (Val >= Low && Val < Hi);
 }
 
-/// isUndefOrInRange - Return true if every element in Mask, begining
-/// from position Pos and ending in Pos+Size, falls within the specified
-/// range (L, L+Pos]. or is undef.
-static bool isUndefOrInRange(ArrayRef<int> Mask,
-                             int Pos, int Size, int Low, int Hi) {
-  for (int i = Pos, e = Pos+Size; i != e; ++i)
-    if (!isUndefOrInRange(Mask[i], Low, Hi))
-      return false;
-  return true;
-}
-
 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
 /// specified value.
 static bool isUndefOrEqual(int Val, int CmpVal) {
@@ -5948,95 +5937,106 @@
                                              OpVT, SrcOp)));
 }
 
-/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
-/// shuffle node referes to only one lane in the sources.
-static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
-  EVT VT = SVOp->getValueType(0);
-  int NumElems = VT.getVectorNumElements();
-  int HalfSize = NumElems/2;
-  ArrayRef<int> M = SVOp->getMask();
-  bool MatchA = false, MatchB = false;
-
-  for (int l = 0; l < NumElems*2; l += HalfSize) {
-    if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
-      MatchA = true;
-      break;
-    }
-  }
-
-  for (int l = 0; l < NumElems*2; l += HalfSize) {
-    if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
-      MatchB = true;
-      break;
-    }
-  }
-
-  return MatchA && MatchB;
-}
-
 /// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
 /// which could not be matched by any known target speficic shuffle
 static SDValue
 LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
-  if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
-    // If each half of a vector shuffle node referes to only one lane in the
-    // source vectors, extract each used 128-bit lane and shuffle them using
-    // 128-bit shuffles. Then, concatenate the results. Otherwise leave
-    // the work to the legalizer.
-    DebugLoc dl = SVOp->getDebugLoc();
-    EVT VT = SVOp->getValueType(0);
-    int NumElems = VT.getVectorNumElements();
-    int HalfSize = NumElems/2;
+  EVT VT = SVOp->getValueType(0);
+
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned NumLaneElems = NumElems / 2;
 
-    // Extract the reference for each half
-    int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
-    int FstVecOpNum = 0, SndVecOpNum = 0;
-    for (int i = 0; i < HalfSize; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      if (SVOp->getMaskElt(i) < 0)
+  int MinRange[2][2] = { { static_cast<int>(NumElems),
+                           static_cast<int>(NumElems) },
+                         { static_cast<int>(NumElems),
+                           static_cast<int>(NumElems) } };
+  int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } };
+
+  // Collect used ranges for each source in each lane
+  for (unsigned l = 0; l < 2; ++l) {
+    unsigned LaneStart = l*NumLaneElems;
+    for (unsigned i = 0; i != NumLaneElems; ++i) {
+      int Idx = SVOp->getMaskElt(i+LaneStart);
+      if (Idx < 0)
         continue;
-      FstVecOpNum = Elt/NumElems;
-      FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
-      break;
+
+      int Input = 0;
+      if (Idx >= (int)NumElems) {
+        Idx -= NumElems;
+        Input = 1;
+      }
+
+      if (Idx > MaxRange[l][Input])
+        MaxRange[l][Input] = Idx;
+      if (Idx < MinRange[l][Input])
+        MinRange[l][Input] = Idx;
     }
-    for (int i = HalfSize; i < NumElems; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      if (SVOp->getMaskElt(i) < 0)
+  }
+
+  // Make sure each range is 128-bits
+  int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } };
+  for (unsigned l = 0; l < 2; ++l) {
+    for (unsigned Input = 0; Input < 2; ++Input) {
+      if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0)
         continue;
-      SndVecOpNum = Elt/NumElems;
-      SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
-      break;
+
+      if (MinRange[l][Input] >= 0 && MinRange[l][Input] < (int)NumLaneElems)
+        ExtractIdx[l][Input] = 0;
+      else if (MinRange[l][Input] >= (int)NumLaneElems &&
+               MinRange[l][Input] < (int)NumElems)
+        ExtractIdx[l][Input] = NumLaneElems;
+      else
+        return SDValue();
     }
+  }
 
-    // Extract the subvectors
-    SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
-                      DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
-    SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
-                      DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
+  DebugLoc dl = SVOp->getDebugLoc();
+  MVT EltVT = VT.getVectorElementType().getSimpleVT();
+  EVT NVT = MVT::getVectorVT(EltVT, NumElems/2);
 
-    // Generate 128-bit shuffles
-    SmallVector<int, 16> MaskV1, MaskV2;
-    for (int i = 0; i < HalfSize; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
-    }
-    for (int i = HalfSize; i < NumElems; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+  SDValue Ops[2][2];
+  for (unsigned l = 0; l < 2; ++l) {
+    for (unsigned Input = 0; Input < 2; ++Input) {
+      if (ExtractIdx[l][Input] >= 0)
+        Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input),
+                                DAG.getConstant(ExtractIdx[l][Input], MVT::i32),
+                                                DAG, dl);
+      else
+        Ops[l][Input] = DAG.getUNDEF(NVT);
     }
+  }
 
-    EVT NVT = V1.getValueType();
-    V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
-    V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
-
-    // Concatenate the result back
-    SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
-                                   DAG.getConstant(0, MVT::i32), DAG, dl);
-    return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
-                              DAG, dl);
+  // Generate 128-bit shuffles
+  SmallVector<int, 16> Mask1, Mask2;
+  for (unsigned i = 0; i != NumLaneElems; ++i) {
+    int Elt = SVOp->getMaskElt(i);
+    if (Elt >= (int)NumElems) {
+      Elt %= NumLaneElems;
+      Elt += NumLaneElems;
+    } else if (Elt >= 0) {
+      Elt %= NumLaneElems;
+    }
+    Mask1.push_back(Elt);
+  }
+  for (unsigned i = NumLaneElems; i != NumElems; ++i) {
+    int Elt = SVOp->getMaskElt(i);
+    if (Elt >= (int)NumElems) {
+      Elt %= NumLaneElems;
+      Elt += NumLaneElems;
+    } else if (Elt >= 0) {
+      Elt %= NumLaneElems;
+    }
+    Mask2.push_back(Elt);
   }
 
-  return SDValue();
+  SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]);
+  SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]);
+
+  // Concatenate the result back
+  SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1,
+                                 DAG.getConstant(0, MVT::i32), DAG, dl);
+  return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32),
+                            DAG, dl);
 }
 
 /// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with





More information about the llvm-commits mailing list