[llvm] r247511 - [X86] Moved lowerVectorShuffleWithUNPCK earlier to make reuse easier. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 12 09:03:07 PDT 2015
Author: rksimon
Date: Sat Sep 12 11:03:06 2015
New Revision: 247511
URL: http://llvm.org/viewvc/llvm-project?rev=247511&view=rev
Log:
[X86] Moved lowerVectorShuffleWithUNPCK earlier to make reuse easier. NFCI.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=247511&r1=247510&r2=247511&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Sep 12 11:03:06 2015
@@ -6670,6 +6670,52 @@ static SmallBitVector computeZeroableShu
return Zeroable;
}
+// X86 has dedicated unpack instructions that can handle specific blend
+// operations: UNPCKH and UNPCKL.
+static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+ int NumElts = VT.getVectorNumElements();
+ bool Unpckl = true;
+ bool Unpckh = true;
+ bool UnpcklSwapped = true;
+ bool UnpckhSwapped = true;
+ int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+
+ for (int i = 0; i < NumElts; ++i) {
+ unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+
+ int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2);
+ int HiPos = LoPos + NumEltsInLane / 2;
+ int LoPosSwapped = (LoPos + NumElts) % (NumElts * 2);
+ int HiPosSwapped = (HiPos + NumElts) % (NumElts * 2);
+
+ if (Mask[i] == -1)
+ continue;
+ if (Mask[i] != LoPos)
+ Unpckl = false;
+ if (Mask[i] != HiPos)
+ Unpckh = false;
+ if (Mask[i] != LoPosSwapped)
+ UnpcklSwapped = false;
+ if (Mask[i] != HiPosSwapped)
+ UnpckhSwapped = false;
+ if (!Unpckl && !Unpckh && !UnpcklSwapped && !UnpckhSwapped)
+ return SDValue();
+ }
+ if (Unpckl)
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ if (Unpckh)
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+ if (UnpcklSwapped)
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
+ if (UnpckhSwapped)
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
+
+ llvm_unreachable("Unexpected result of UNPCK mask analysis");
+ return SDValue();
+}
+
/// \brief Try to emit a bitmask instruction for a shuffle.
///
/// This handles cases where we can model a blend exactly as a bitmask due to
@@ -10586,52 +10632,6 @@ static SDValue lowerVectorShuffleWithPER
return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
}
-// X86 has dedicated unpack instructions that can handle specific blend
-// operations: UNPCKH and UNPCKL.
-static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
- int NumElts = VT.getVectorNumElements();
- bool Unpckl = true;
- bool Unpckh = true;
- bool UnpcklSwapped = true;
- bool UnpckhSwapped = true;
- int NumEltsInLane = 128 / VT.getScalarSizeInBits();
-
- for (int i = 0; i < NumElts ; ++i) {
- unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
-
- int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2);
- int HiPos = LoPos + NumEltsInLane / 2;
- int LoPosSwapped = (LoPos + NumElts) % (NumElts * 2);
- int HiPosSwapped = (HiPos + NumElts) % (NumElts * 2);
-
- if (Mask[i] == -1)
- continue;
- if (Mask[i] != LoPos)
- Unpckl = false;
- if (Mask[i] != HiPos)
- Unpckh = false;
- if (Mask[i] != LoPosSwapped)
- UnpcklSwapped = false;
- if (Mask[i] != HiPosSwapped)
- UnpckhSwapped = false;
- if (!Unpckl && !Unpckh && !UnpcklSwapped && !UnpckhSwapped)
- return SDValue();
- }
- if (Unpckl)
- return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
- if (Unpckh)
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
- if (UnpcklSwapped)
- return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
- if (UnpckhSwapped)
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
-
- llvm_unreachable("Unexpected result of UNPCK mask analysis");
- return SDValue();
-}
-
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
@@ -10643,10 +10643,9 @@ static SDValue lowerV8F64VectorShuffle(S
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- SDValue UnpckNode =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG);
- if (UnpckNode)
- return UnpckNode;
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ return Unpck;
return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
@@ -10662,10 +10661,9 @@ static SDValue lowerV16F32VectorShuffle(
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- SDValue UnpckNode =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG);
- if (UnpckNode)
- return UnpckNode;
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
+ return Unpck;
return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
}
@@ -10681,10 +10679,9 @@ static SDValue lowerV8I64VectorShuffle(S
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- SDValue UnpckNode =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG);
- if (UnpckNode)
- return UnpckNode;
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ return Unpck;
return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
@@ -10700,10 +10697,9 @@ static SDValue lowerV16I32VectorShuffle(
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- SDValue UnpckNode =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG);
- if (UnpckNode)
- return UnpckNode;
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
+ return Unpck;
return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
@@ -11474,7 +11470,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDV
// If needed, look through a bitcast to get to the load.
if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
SubVec2 = SubVec2.getOperand(0);
-
+
if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
bool Fast;
unsigned Alignment = FirstLd->getAlignment();
More information about the llvm-commits
mailing list