[llvm] r286043 - [X86][SSE] Reuse zeroable element mask in SSE4A EXTRQ/INSERTQ vector shuffle lowering. NFCI

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 5 11:05:13 PDT 2016


Author: rksimon
Date: Sat Nov  5 13:05:13 2016
New Revision: 286043

URL: http://llvm.org/viewvc/llvm-project?rev=286043&view=rev
Log:
[X86][SSE] Reuse zeroable element mask in SSE4A EXTRQ/INSERTQ vector shuffle lowering. NFCI

Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=286043&r1=286042&r2=286043&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov  5 13:05:13 2016
@@ -8025,13 +8025,12 @@ static SDValue lowerVectorShuffleAsShift
 /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
 static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
                                            SDValue V2, ArrayRef<int> Mask,
+                                           const SmallBitVector &Zeroable,
                                            SelectionDAG &DAG) {
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-  assert(!Zeroable.all() && "Fully zeroable shuffle mask");
-
   int Size = Mask.size();
   int HalfSize = Size / 2;
   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+  assert(!Zeroable.all() && "Fully zeroable shuffle mask");
 
   // Upper half must be undefined.
   if (!isUndefInRange(Mask, HalfSize, HalfSize))
@@ -10112,7 +10111,8 @@ static SDValue lowerV8I16VectorShuffle(c
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
+                                                Zeroable, DAG))
       return V;
 
   // There are special ways we can lower some single-element blends.
@@ -10265,7 +10265,8 @@ static SDValue lowerV16I8VectorShuffle(c
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
+                                                Zeroable, DAG))
       return V;
 
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });




More information about the llvm-commits mailing list