[llvm] r286043 - [X86][SSE] Reuse zeroable element mask in SSE4A EXTRQ/INSERTQ vector shuffle lowering. NFCI
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 5 11:05:13 PDT 2016
Author: rksimon
Date: Sat Nov 5 13:05:13 2016
New Revision: 286043
URL: http://llvm.org/viewvc/llvm-project?rev=286043&view=rev
Log:
[X86][SSE] Reuse zeroable element mask in SSE4A EXTRQ/INSERTQ vector shuffle lowering. NFCI
Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=286043&r1=286042&r2=286043&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 5 13:05:13 2016
@@ -8025,13 +8025,12 @@ static SDValue lowerVectorShuffleAsShift
/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
+ const SmallBitVector &Zeroable,
SelectionDAG &DAG) {
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- assert(!Zeroable.all() && "Fully zeroable shuffle mask");
-
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+ assert(!Zeroable.all() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
if (!isUndefInRange(Mask, HalfSize, HalfSize))
@@ -10112,7 +10111,8 @@ static SDValue lowerV8I16VectorShuffle(c
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, DAG))
return V;
// There are special ways we can lower some single-element blends.
@@ -10265,7 +10265,8 @@ static SDValue lowerV16I8VectorShuffle(c
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, DAG))
return V;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
More information about the llvm-commits
mailing list