[llvm] r352280 - [x86] add helper for creating a half-width shuffle; NFC

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 26 08:20:22 PST 2019


Author: spatel
Date: Sat Jan 26 08:20:22 2019
New Revision: 352280

URL: http://llvm.org/viewvc/llvm-project?rev=352280&view=rev
Log:
[x86] add helper for creating a half-width shuffle; NFC

This reduces a bit of duplication between the combining and
lowering places that use it, but the primary motivation is
to make it easier to rearrange the lowering logic and solve
PR40434:
https://bugs.llvm.org/show_bug.cgi?id=40434

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352280&r1=352279&r2=352280&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 26 08:20:22 2019
@@ -14371,6 +14371,38 @@ getHalfShuffleMask(ArrayRef<int> Mask, M
   return true;
 }
 
+/// Given the output values from getHalfShuffleMask(), create a half width
+/// shuffle of extracted vectors followed by an insert back to full width.
+static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
+                                     ArrayRef<int> HalfMask, int HalfIdx1,
+                                     int HalfIdx2, bool UndefLower,
+                                     SelectionDAG &DAG) {
+  assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
+  assert(V1.getValueType().isSimple() && "Expecting only simple types");
+
+  MVT VT = V1.getSimpleValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfNumElts = NumElts / 2;
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+
+  auto getHalfVector = [&](int HalfIdx) {
+    if (HalfIdx < 0)
+      return DAG.getUNDEF(HalfVT);
+    SDValue V = (HalfIdx < 2 ? V1 : V2);
+    HalfIdx = (HalfIdx % 2) * HalfNumElts;
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
+                       DAG.getIntPtrConstant(HalfIdx, DL));
+  };
+
+  // ins undef, (shuf (ext V1, HalfIdx1), (ext V2, HalfIdx2), HalfMask), Offset
+  SDValue Half1 = getHalfVector(HalfIdx1);
+  SDValue Half2 = getHalfVector(HalfIdx2);
+  SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+  unsigned Offset = UndefLower ? HalfNumElts : 0;
+  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
+                     DAG.getIntPtrConstant(Offset, DL));
+}
+
 /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
 /// This allows for fast cases such as subvector extraction/insertion
 /// or shuffling smaller vector types which can lower more efficiently.
@@ -14450,21 +14482,8 @@ static SDValue lowerShuffleWithUndefHalf
   if (VT.is512BitVector() && (UndefLower || NumUpperHalves != 0))
     return SDValue();
 
-  auto GetHalfVector = [&](int HalfIdx) {
-    if (HalfIdx < 0)
-      return DAG.getUNDEF(HalfVT);
-    SDValue V = (HalfIdx < 2 ? V1 : V2);
-    HalfIdx = (HalfIdx % 2) * HalfNumElts;
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
-                       DAG.getIntPtrConstant(HalfIdx, DL));
-  };
-
-  SDValue Half1 = GetHalfVector(HalfIdx1);
-  SDValue Half2 = GetHalfVector(HalfIdx2);
-  SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
-  unsigned Offset = UndefLower ? HalfNumElts : 0;
-  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
-                     DAG.getIntPtrConstant(Offset, DL));
+  return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+                               UndefLower, DAG);
 }
 
 /// Test whether the specified input (0 or 1) is in-place blended by the
@@ -32353,22 +32372,14 @@ static SDValue narrowShuffle(ShuffleVect
       (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1))
     return SDValue();
 
-  // Create 4 instructions to replace the unnecessarily wide shuffle.
+  // Create a half-width shuffle to replace the unnecessarily wide shuffle.
   // The trick is knowing that all of the insert/extract are actually free
-  // subregister (zmm->ymm or ymm->xmm) ops. That leaves us with a shuffle
+  // subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle
   // of narrow inputs into a narrow output, and that is always cheaper than
   // the wide shuffle that we started with.
-  unsigned NumElts = Mask.size();
-  SDValue Op0 = Shuf->getOperand(0);
-  SDValue Op1 = Shuf->getOperand(1);
-  SDLoc DL(Shuf);
-  SDValue Index0 = DAG.getIntPtrConstant(0, DL);
-  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts / 2);
-  SDValue Extr0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op0, Index0);
-  SDValue Extr1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, Index0);
-  SDValue NewShuf = DAG.getVectorShuffle(HalfVT, DL, Extr0, Extr1, HalfMask);
-  SDValue UndefV = DAG.getUNDEF(VT);
-  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, UndefV, NewShuf, Index0);
+  return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
+                               Shuf->getOperand(1), HalfMask, HalfIdx1,
+                               HalfIdx2, false, DAG);
 }
 
 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,




More information about the llvm-commits mailing list