[llvm] r218399 - [x86] Factor out the logic to generically decombose a vector shuffle
Chandler Carruth
chandlerc at gmail.com
Wed Sep 24 11:20:09 PDT 2014
Author: chandlerc
Date: Wed Sep 24 13:20:09 2014
New Revision: 218399
URL: http://llvm.org/viewvc/llvm-project?rev=218399&view=rev
Log:
[x86] Factor out the logic to generically decombose a vector shuffle
into unblended shuffles and a blend.
This is the consistent fallback for the lowering paths that have fast
blend operations available, and its getting quite repetitive.
No functionality changed.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=218399&r1=218398&r2=218399&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Sep 24 13:20:09 2014
@@ -7317,6 +7317,36 @@ static SDValue lowerVectorShuffleAsBlend
}
}
+/// \brief Generic routine to lower a shuffle and blend as a decomposed set of
+/// unblended shuffles followed by an unshuffled blend.
+///
+/// This matches the extremely common pattern for handling combined
+/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
+/// operations.
+static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
+ SDValue V1,
+ SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ // Shuffle the input elements into the desired positions in V1 and V2 and
+ // blend them together.
+ SmallVector<int, 32> V1Mask(Mask.size(), -1);
+ SmallVector<int, 32> V2Mask(Mask.size(), -1);
+ SmallVector<int, 32> BlendMask(Mask.size(), -1);
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= 0 && Mask[i] < Size) {
+ V1Mask[i] = Mask[i];
+ BlendMask[i] = i;
+ } else if (Mask[i] >= Size) {
+ V2Mask[i] = Mask[i] - Size;
+ BlendMask[i] = i + Size;
+ }
+
+ V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
+ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
+ return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
+}
+
/// \brief Try to lower a vector shuffle as a byte rotation.
///
/// We have a generic PALIGNR instruction in x86 that will do an arbitrary
@@ -9362,26 +9392,9 @@ static SDValue lowerV4F64VectorShuffle(S
DAG.getConstant(SHUFPDMask, MVT::i8));
}
- // Shuffle the input elements into the desired positions in V1 and V2 and
- // blend them together.
- int V1Mask[] = {-1, -1, -1, -1};
- int V2Mask[] = {-1, -1, -1, -1};
- for (int i = 0; i < 4; ++i)
- if (Mask[i] >= 0 && Mask[i] < 4)
- V1Mask[i] = Mask[i];
- else if (Mask[i] >= 4)
- V2Mask[i] = Mask[i] - 4;
-
- V1 = DAG.getVectorShuffle(MVT::v4f64, DL, V1, DAG.getUNDEF(MVT::v4f64), V1Mask);
- V2 = DAG.getVectorShuffle(MVT::v4f64, DL, V2, DAG.getUNDEF(MVT::v4f64), V2Mask);
-
- unsigned BlendMask = 0;
- for (int i = 0; i < 4; ++i)
- if (Mask[i] >= 4)
- BlendMask |= 1 << i;
-
- return DAG.getNode(X86ISD::BLENDI, DL, MVT::v4f64, V1, V2,
- DAG.getConstant(BlendMask, MVT::i8));
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
+ Mask, DAG);
}
/// \brief Handle lowering of 4-lane 64-bit integer shuffles.
@@ -9434,23 +9447,9 @@ static SDValue lowerV4I64VectorShuffle(S
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
- // Shuffle the input elements into the desired positions in V1 and V2 and
- // blend them together.
- int V1Mask[] = {-1, -1, -1, -1};
- int V2Mask[] = {-1, -1, -1, -1};
- int BlendMask[] = {-1, -1, -1, -1};
- for (int i = 0; i < 4; ++i)
- if (Mask[i] >= 0 && Mask[i] < 4) {
- V1Mask[i] = Mask[i];
- BlendMask[i] = i;
- } else if (Mask[i] >= 4) {
- V2Mask[i] = Mask[i] - 4;
- BlendMask[i] = i + 4;
- }
-
- V1 = DAG.getVectorShuffle(MVT::v4i64, DL, V1, DAG.getUNDEF(MVT::v4i64), V1Mask);
- V2 = DAG.getVectorShuffle(MVT::v4i64, DL, V2, DAG.getUNDEF(MVT::v4i64), V2Mask);
- return DAG.getVectorShuffle(MVT::v4i64, DL, V1, V2, BlendMask);
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
+ Mask, DAG);
}
/// \brief Handle lowering of 8-lane 32-bit floating point shuffles.
@@ -9509,24 +9508,9 @@ static SDValue lowerV8F32VectorShuffle(S
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
}
- // Shuffle the input elements into the desired positions in V1 and V2 and
- // blend them together.
- int V1Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
- int V2Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
- unsigned BlendMask = 0;
- for (int i = 0; i < 8; ++i)
- if (Mask[i] >= 0 && Mask[i] < 8) {
- V1Mask[i] = Mask[i];
- } else if (Mask[i] >= 8) {
- V2Mask[i] = Mask[i] - 8;
- BlendMask |= 1 << i;
- }
-
- V1 = DAG.getVectorShuffle(MVT::v8f32, DL, V1, DAG.getUNDEF(MVT::v8f32), V1Mask);
- V2 = DAG.getVectorShuffle(MVT::v8f32, DL, V2, DAG.getUNDEF(MVT::v8f32), V2Mask);
-
- return DAG.getNode(X86ISD::BLENDI, DL, MVT::v8f32, V1, V2,
- DAG.getConstant(BlendMask, MVT::i8));
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
+ Mask, DAG);
}
/// \brief Handle lowering of 8-lane 32-bit integer shuffles.
@@ -9577,23 +9561,9 @@ static SDValue lowerV8I32VectorShuffle(S
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
}
- // Shuffle the input elements into the desired positions in V1 and V2 and
- // blend them together.
- int V1Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
- int V2Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
- int BlendMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
- for (int i = 0; i < 8; ++i)
- if (Mask[i] >= 0 && Mask[i] < 8) {
- V1Mask[i] = Mask[i];
- BlendMask[i] = i;
- } else if (Mask[i] >= 8) {
- V2Mask[i] = Mask[i] - 8;
- BlendMask[i] = i + 8;
- }
-
- V1 = DAG.getVectorShuffle(MVT::v8i32, DL, V1, DAG.getUNDEF(MVT::v8i32), V1Mask);
- V2 = DAG.getVectorShuffle(MVT::v8i32, DL, V2, DAG.getUNDEF(MVT::v8i32), V2Mask);
- return DAG.getVectorShuffle(MVT::v8i32, DL, V1, V2, BlendMask);
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
+ Mask, DAG);
}
/// \brief Handle lowering of 16-lane 16-bit integer shuffles.
More information about the llvm-commits
mailing list