[llvm] r277808 - [X86][SSE] Added target shuffle combine binary compute matching function. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 5 04:16:53 PDT 2016
Author: rksimon
Date: Fri Aug 5 06:16:53 2016
New Revision: 277808
URL: http://llvm.org/viewvc/llvm-project?rev=277808&view=rev
Log:
[X86][SSE] Added target shuffle combine binary compute matching function. NFCI.
Added matchBinaryPermuteVectorShuffle and moved the blend+zero and insertps matching code into it.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=277808&r1=277807&r2=277808&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 5 06:16:53 2016
@@ -25005,6 +25005,71 @@ static bool matchBinaryVectorShuffle(MVT
return false;
}
+static bool matchBinaryPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
+ SDValue &V1, SDValue &V2,
+ SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ unsigned &Shuffle, MVT &ShuffleVT,
+ unsigned &PermuteImm) {
+ unsigned NumMaskElts = Mask.size();
+
+ // Attempt to blend with zero.
+ if (NumMaskElts <= 8 && ((Subtarget.hasSSE41() && SrcVT.is128BitVector()) ||
+ (Subtarget.hasAVX() && SrcVT.is256BitVector()))) {
+ // Determine a type compatible with X86ISD::BLENDI.
+ // TODO - add 16i16 support (requires lane duplication).
+ MVT BlendVT = SrcVT;
+ if (Subtarget.hasAVX2()) {
+ if (BlendVT == MVT::v4i64)
+ BlendVT = MVT::v8i32;
+ else if (BlendVT == MVT::v2i64)
+ BlendVT = MVT::v4i32;
+ } else {
+ if (BlendVT == MVT::v2i64 || BlendVT == MVT::v4i32)
+ BlendVT = MVT::v8i16;
+ else if (BlendVT == MVT::v4i64)
+ BlendVT = MVT::v4f64;
+ else if (BlendVT == MVT::v8i32)
+ BlendVT = MVT::v8f32;
+ }
+
+ if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
+ /*Low*/ 0) &&
+ NumMaskElts <= BlendVT.getVectorNumElements()) {
+ unsigned BlendSize = BlendVT.getVectorNumElements();
+ unsigned MaskRatio = BlendSize / NumMaskElts;
+
+ PermuteImm = 0;
+ for (unsigned i = 0; i != BlendSize; ++i)
+ if (Mask[i / MaskRatio] < 0)
+ PermuteImm |= 1u << i;
+
+ V2 = getZeroVector(BlendVT, Subtarget, DAG, DL);
+ Shuffle = X86ISD::BLENDI;
+ ShuffleVT = BlendVT;
+ return true;
+ }
+ }
+
+ // Attempt to combine to INSERTPS.
+ if (Subtarget.hasSSE41() && NumMaskElts == 4 &&
+ (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32)) {
+ SmallBitVector Zeroable(4, false);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ if (Mask[i] < 0)
+ Zeroable[i] = true;
+
+ if (Zeroable.any())
+ if (matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
+ Shuffle = X86ISD::INSERTPS;
+ ShuffleVT = MVT::v4f32;
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// \brief Combine an arbitrary chain of shuffles into a single instruction if
/// possible.
///
@@ -25142,78 +25207,21 @@ static bool combineX86ShuffleChain(SDVal
return true;
}
- // Attempt to blend with zero.
- if (NumMaskElts <= 8 &&
- ((Subtarget.hasSSE41() && VT.is128BitVector()) ||
- (Subtarget.hasAVX() && VT.is256BitVector()))) {
- // Convert VT to a type compatible with X86ISD::BLENDI.
- // TODO - add 16i16 support (requires lane duplication).
- MVT ShuffleVT = MaskVT;
- if (Subtarget.hasAVX2()) {
- if (ShuffleVT == MVT::v4i64)
- ShuffleVT = MVT::v8i32;
- else if (ShuffleVT == MVT::v2i64)
- ShuffleVT = MVT::v4i32;
- } else {
- if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
- ShuffleVT = MVT::v8i16;
- else if (ShuffleVT == MVT::v4i64)
- ShuffleVT = MVT::v4f64;
- else if (ShuffleVT == MVT::v8i32)
- ShuffleVT = MVT::v8f32;
- }
-
- if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
- /*Low*/ 0) &&
- NumMaskElts <= ShuffleVT.getVectorNumElements()) {
- unsigned BlendMask = 0;
- unsigned ShuffleSize = ShuffleVT.getVectorNumElements();
- unsigned MaskRatio = ShuffleSize / NumMaskElts;
-
- if (Depth == 1 && Root.getOpcode() == X86ISD::BLENDI)
- return false;
-
- for (unsigned i = 0; i != ShuffleSize; ++i)
- if (Mask[i / MaskRatio] < 0)
- BlendMask |= 1u << i;
-
- SDValue Zero = getZeroVector(ShuffleVT, Subtarget, DAG, DL);
- Res = DAG.getBitcast(ShuffleVT, Input);
- DCI.AddToWorklist(Res.getNode());
- Res = DAG.getNode(X86ISD::BLENDI, DL, ShuffleVT, Res, Zero,
- DAG.getConstant(BlendMask, DL, MVT::i8));
- DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
- }
- }
-
- // Attempt to combine to INSERTPS.
- if (Subtarget.hasSSE41() && NumMaskElts == 4 &&
- (VT == MVT::v2f64 || VT == MVT::v4f32)) {
- SmallBitVector Zeroable(4, false);
- for (unsigned i = 0; i != NumMaskElts; ++i)
- if (Mask[i] < 0)
- Zeroable[i] = true;
-
- unsigned InsertPSMask;
- SDValue V1 = Input, V2 = Input;
- if (Zeroable.any() && matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask,
- Zeroable, Mask, DAG)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTPS)
- return false; // Nothing to do!
- V1 = DAG.getBitcast(MVT::v4f32, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(MVT::v4f32, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
- DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
- }
+ SDValue V1 = Input, V2 = Input;
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget,
+ Shuffle, ShuffleVT, PermuteImm)) {
+ if (Depth == 1 && Root.getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ V1 = DAG.getBitcast(ShuffleVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ V2 = DAG.getBitcast(ShuffleVT, V2);
+ DCI.AddToWorklist(V2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
+ DAG.getConstant(PermuteImm, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
}
// Don't try to re-form single instruction chains under any circumstances now
More information about the llvm-commits
mailing list