[llvm] [LoongArch] lower vector shuffle as byte rotate (if possible) (PR #135157)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 00:02:48 PDT 2025
================
@@ -696,6 +696,140 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
}
}
+/// Test whether a shuffle mask is equivalent within each sub-lane.
+///
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// suitable for use with existing 128-bit shuffles as entries from the second
+/// vector have been remapped to [LaneSize, 2*LaneSize).
+static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
+ ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ RepeatedMask.assign(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ assert(Mask[i] == -1 || Mask[i] >= 0);
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ // Adjust second vector indices to start at LaneSize instead of Size.
+ int LocalM =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
+ if (RepeatedMask[i % LaneSize] < 0)
+ // This is the first non-undef entry in this slot of a 128-bit lane.
+ RepeatedMask[i % LaneSize] = LocalM;
+ else if (RepeatedMask[i % LaneSize] != LocalM)
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
+}
+
+/// Attempts to match vector shuffle as byte rotation.
+static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
+
+ SDValue Lo, Hi;
+ SmallVector<int, 16> RepeatedMask;
+
+ if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
+ return -1;
+
+ int NumElts = RepeatedMask.size();
----------------
heiher wrote:
If this code is likely to be reused in future optimizations, it's better to keep it as a separate function — much like how `matchShuffleAsElementRotate()` is structured on x86.
https://github.com/llvm/llvm-project/pull/135157
More information about the llvm-commits
mailing list