[llvm] r286045 - [X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. NFCI
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 5 11:31:57 PDT 2016
Author: rksimon
Date: Sat Nov 5 13:31:57 2016
New Revision: 286045
URL: http://llvm.org/viewvc/llvm-project?rev=286045&view=rev
Log:
[X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. NFCI
Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=286045&r1=286044&r2=286045&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 5 13:31:57 2016
@@ -7558,12 +7558,12 @@ static SDValue lowerVectorShuffleAsBitBl
/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Original,
+ const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
SmallVector<int, 8> Mask(Original.begin(), Original.end());
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
bool ForceV1Zero = false, ForceV2Zero = false;
// Attempt to generate the binary blend mask. If an input is zero then
@@ -9047,6 +9047,7 @@ static SDValue lowerVectorShuffleAsPermu
/// it is better to avoid lowering through this for integer vectors where
/// possible.
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -9108,7 +9109,7 @@ static SDValue lowerV2F64VectorShuffle(c
if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -9201,7 +9202,7 @@ static SDValue lowerV2I64VectorShuffle(c
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -9396,7 +9397,7 @@ static SDValue lowerV4F32VectorShuffle(c
if (Subtarget.hasSSE41()) {
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Use INSERTPS if we can complete the shuffle efficiently.
@@ -9485,7 +9486,7 @@ static SDValue lowerV4I32VectorShuffle(c
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
@@ -10125,7 +10126,7 @@ static SDValue lowerV8I16VectorShuffle(c
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
@@ -10402,8 +10403,8 @@ static SDValue lowerV16I8VectorShuffle(c
// important as a single pshufb is significantly faster for that.
if (V1InUse && V2InUse) {
if (Subtarget.hasSSE41())
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Blend = lowerVectorShuffleAsBlend(
+ DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Blend;
// We can use an unpack to do the blending rather than an or in some
@@ -10536,7 +10537,7 @@ static SDValue lower128BitVectorShuffle(
case MVT::v2i64:
return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v2f64:
- return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+ return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i32:
return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4f32:
@@ -10841,6 +10842,7 @@ static SDValue lowerVectorShuffleAsLaneP
/// \brief Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
+ const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// TODO: If minimizing size and one of the inputs is a zero vector and the
@@ -10849,7 +10851,7 @@ static SDValue lowerV2X128VectorShuffle(
// Blends are faster and handle all the non-lane-crossing cases.
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
@@ -11359,6 +11361,7 @@ static SDValue lowerVectorShuffleWithPER
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11369,7 +11372,7 @@ static SDValue lowerV4F64VectorShuffle(c
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return V;
if (V2.isUndef()) {
@@ -11413,7 +11416,7 @@ static SDValue lowerV4F64VectorShuffle(c
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Check if the blend happens to exactly fit that of SHUFPD.
@@ -11464,11 +11467,11 @@ static SDValue lowerV4I64VectorShuffle(c
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
@@ -11530,6 +11533,7 @@ static SDValue lowerV4I64VectorShuffle(c
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11538,7 +11542,7 @@ static SDValue lowerV8F32VectorShuffle(c
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
@@ -11632,7 +11636,7 @@ static SDValue lowerV8I32VectorShuffle(c
return ZExt;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
@@ -11717,7 +11721,7 @@ static SDValue lowerV16I16VectorShuffle(
return Broadcast;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -11803,7 +11807,7 @@ static SDValue lowerV32I8VectorShuffle(c
return Broadcast;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -11900,11 +11904,11 @@ static SDValue lower256BitVectorShuffle(
switch (VT.SimpleTy) {
case MVT::v4f64:
- return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+ return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i64:
return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8f32:
- return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+ return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i32:
return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i16:
More information about the llvm-commits
mailing list