[llvm] b424055 - [X86] lowerShuffleAsRepeatedMaskAndLanePermute - move the sublane split code into a lambda helper. NFC.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 29 08:06:01 PDT 2022
Author: Simon Pilgrim
Date: 2022-04-29T16:03:50+01:00
New Revision: b424055b52a52c0a2ae8eb08de1460f7cfb4fb43
URL: https://github.com/llvm/llvm-project/commit/b424055b52a52c0a2ae8eb08de1460f7cfb4fb43
DIFF: https://github.com/llvm/llvm-project/commit/b424055b52a52c0a2ae8eb08de1460f7cfb4fb43.diff
LOG: [X86] lowerShuffleAsRepeatedMaskAndLanePermute - move the sublane split code into a lambda helper. NFC.
This is a NFC cleanup as part of the work on #55066 - the idea being that we will be able to check for multiple sub lane scales.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4689d9d14fb5..ec14ee9be192 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17289,116 +17289,124 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
if (is128BitLaneRepeatedShuffleMask(VT, Mask))
return SDValue();
- // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
- // (with PERMQ/PERMPD). On AVX512BW targets, permuting 32-bit sub-lanes, even
- // with a variable shuffle, is worth it for 64xi8 vectors. Otherwise we can
- // only permute whole 128-bit lanes.
- int SubLaneScale = 1;
- if (Subtarget.hasAVX2() && VT.is256BitVector())
- SubLaneScale = 2;
- if (Subtarget.hasBWI() && VT == MVT::v64i8)
- SubLaneScale = 4;
- int NumSubLanes = NumLanes * SubLaneScale;
- int NumSubLaneElts = NumLaneElts / SubLaneScale;
-
- // Check that all the sources are coming from the same lane and see if we can
- // form a repeating shuffle mask (local to each sub-lane). At the same time,
- // determine the source sub-lane for each destination sub-lane.
- int TopSrcSubLane = -1;
- SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
- SmallVector<SmallVector<int, 8>> RepeatedSubLaneMasks(
- SubLaneScale,
- SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef));
-
- for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
- // Extract the sub-lane mask, check that it all comes from the same lane
- // and normalize the mask entries to come from the first lane.
- int SrcLane = -1;
- SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
- for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
- int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
- if (M < 0)
+ // Helper to look for repeated mask in each split sublane, and that those
+ // sublanes can then be permuted into place.
+ auto ShuffleSubLanes = [&](int SubLaneScale) {
+ int NumSubLanes = NumLanes * SubLaneScale;
+ int NumSubLaneElts = NumLaneElts / SubLaneScale;
+
+ // Check that all the sources are coming from the same lane and see if we
+ // can form a repeating shuffle mask (local to each sub-lane). At the same
+ // time, determine the source sub-lane for each destination sub-lane.
+ int TopSrcSubLane = -1;
+ SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
+ SmallVector<SmallVector<int, 8>> RepeatedSubLaneMasks(
+ SubLaneScale,
+ SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef));
+
+ for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
+ // Extract the sub-lane mask, check that it all comes from the same lane
+ // and normalize the mask entries to come from the first lane.
+ int SrcLane = -1;
+ SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
+ for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
+ int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
+ if (M < 0)
+ continue;
+ int Lane = (M % NumElts) / NumLaneElts;
+ if ((0 <= SrcLane) && (SrcLane != Lane))
+ return SDValue();
+ SrcLane = Lane;
+ int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
+ SubLaneMask[Elt] = LocalM;
+ }
+
+ // Whole sub-lane is UNDEF.
+ if (SrcLane < 0)
continue;
- int Lane = (M % NumElts) / NumLaneElts;
- if ((0 <= SrcLane) && (SrcLane != Lane))
- return SDValue();
- SrcLane = Lane;
- int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
- SubLaneMask[Elt] = LocalM;
- }
- // Whole sub-lane is UNDEF.
- if (SrcLane < 0)
- continue;
+ // Attempt to match against the candidate repeated sub-lane masks.
+ for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
+ auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
+ for (int i = 0; i != NumSubLaneElts; ++i) {
+ if (M1[i] < 0 || M2[i] < 0)
+ continue;
+ if (M1[i] != M2[i])
+ return false;
+ }
+ return true;
+ };
- // Attempt to match against the candidate repeated sub-lane masks.
- for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
- auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
+ auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
+ if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
+ continue;
+
+ // Merge the sub-lane mask into the matching repeated sub-lane mask.
for (int i = 0; i != NumSubLaneElts; ++i) {
- if (M1[i] < 0 || M2[i] < 0)
+ int M = SubLaneMask[i];
+ if (M < 0)
continue;
- if (M1[i] != M2[i])
- return false;
+ assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
+ "Unexpected mask element");
+ RepeatedSubLaneMask[i] = M;
}
- return true;
- };
- auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
- if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
- continue;
+ // Track the top most source sub-lane - by setting the remaining to
+ // UNDEF we can greatly simplify shuffle matching.
+ int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
+ TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
+ Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
+ break;
+ }
+
+ // Bail if we failed to find a matching repeated sub-lane mask.
+ if (Dst2SrcSubLanes[DstSubLane] < 0)
+ return SDValue();
+ }
+ assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
+ "Unexpected source lane");
- // Merge the sub-lane mask into the matching repeated sub-lane mask.
- for (int i = 0; i != NumSubLaneElts; ++i) {
- int M = SubLaneMask[i];
+ // Create a repeating shuffle mask for the entire vector.
+ SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
+ for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
+ int Lane = SubLane / SubLaneScale;
+ auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
+ for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
+ int M = RepeatedSubLaneMask[Elt];
if (M < 0)
continue;
- assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
- "Unexpected mask element");
- RepeatedSubLaneMask[i] = M;
+ int Idx = (SubLane * NumSubLaneElts) + Elt;
+ RepeatedMask[Idx] = M + (Lane * NumLaneElts);
}
-
- // Track the top most source sub-lane - by setting the remaining to UNDEF
- // we can greatly simplify shuffle matching.
- int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
- TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
- Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
- break;
}
+ SDValue RepeatedShuffle =
+ DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);
- // Bail if we failed to find a matching repeated sub-lane mask.
- if (Dst2SrcSubLanes[DstSubLane] < 0)
- return SDValue();
- }
- assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
- "Unexpected source lane");
-
- // Create a repeating shuffle mask for the entire vector.
- SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
- for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
- int Lane = SubLane / SubLaneScale;
- auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
- for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
- int M = RepeatedSubLaneMask[Elt];
- if (M < 0)
+ // Shuffle each source sub-lane to its destination.
+ SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
+ for (int i = 0; i != NumElts; i += NumSubLaneElts) {
+ int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
+ if (SrcSubLane < 0)
continue;
- int Idx = (SubLane * NumSubLaneElts) + Elt;
- RepeatedMask[Idx] = M + (Lane * NumLaneElts);
+ for (int j = 0; j != NumSubLaneElts; ++j)
+ SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
}
- }
- SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);
- // Shuffle each source sub-lane to its destination.
- SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
- for (int i = 0; i != NumElts; i += NumSubLaneElts) {
- int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
- if (SrcSubLane < 0)
- continue;
- for (int j = 0; j != NumSubLaneElts; ++j)
- SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
- }
+ return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
+ SubLaneMask);
+ };
+
+ // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
+ // (with PERMQ/PERMPD). On AVX512BW targets, permuting 32-bit sub-lanes, even
+ // with a variable shuffle, is worth it for 64xi8 vectors. Otherwise we can
+ // only permute whole 128-bit lanes.
+ int SubLaneScale = 1;
+ if (Subtarget.hasAVX2() && VT.is256BitVector())
+ SubLaneScale = 2;
+ if (Subtarget.hasBWI() && VT == MVT::v64i8)
+ SubLaneScale = 4;
- return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
- SubLaneMask);
+ return ShuffleSubLanes(SubLaneScale);
}
static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
More information about the llvm-commits
mailing list