[llvm] 25e4333 - [RISCV] Lower shuffle which splats a single span (without exact VLEN) (#127108)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 14 17:09:31 PST 2025
Author: Philip Reames
Date: 2025-02-14T17:09:27-08:00
New Revision: 25e43334a86804c77d460493e37b57274257461a
URL: https://github.com/llvm/llvm-project/commit/25e43334a86804c77d460493e37b57274257461a
DIFF: https://github.com/llvm/llvm-project/commit/25e43334a86804c77d460493e37b57274257461a.diff
LOG: [RISCV] Lower shuffle which splats a single span (without exact VLEN) (#127108)
If we have a shuffle which repeats the same pattern of elements, all of
which come from the first register in the source register group, we can
lower this to a single vrgather at m1 to perform the element
rearrangement, and reuse that for each register in the result vector
register group.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index da04880348af6..cd441638bac9e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5352,8 +5352,24 @@ static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
/// Is this mask only using elements from the first span of the input?
static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
- return all_of(Mask,
- [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
+ return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
+}
+
+/// Return true for a mask which performs an arbitrary shuffle within the first
+/// span, and then repeats that same result across all remaining spans. Note
+/// that this doesn't check if all the inputs come from a single span!
+static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
+ SmallVector<int> LowSpan(Span, -1);
+ for (auto [I, M] : enumerate(Mask)) {
+ if (M == -1)
+ continue;
+ int SpanIdx = I % Span;
+ if (LowSpan[SpanIdx] == -1)
+ LowSpan[SpanIdx] = M;
+ if (LowSpan[SpanIdx] != M)
+ return false;
+ }
+ return true;
}
/// Try to widen element type to get a new mask value for a better permutation
@@ -5771,6 +5787,35 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
SubVec, SubIdx);
}
+ } else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX) &&
+ isSpanSplatShuffle(Mask, MinVLMAX)) {
+ // If we have a shuffle which only uses the first register in our source
+ // register group, and repeats the same index across all spans, we can
+ // use a single vrgather (and possibly some register moves).
+ // TODO: This can be generalized for m2 or m4, or for any shuffle for
+ // which we can do a linear number of shuffles to form an m1 which
+ // contains all the output elements.
+ const MVT M1VT = getLMUL1VT(ContainerVT);
+ EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
+ auto [InnerTrueMask, InnerVL] =
+ getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
+ int N = ContainerVT.getVectorMinNumElements() /
+ M1VT.getVectorMinNumElements();
+ assert(isPowerOf2_32(N) && N <= 8);
+ SDValue SubV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, V1,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue SubIndex =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
+ DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
+ Gather = DAG.getUNDEF(ContainerVT);
+ for (int i = 0; i < N; i++) {
+ SDValue SubIdx =
+ DAG.getVectorIdxConstant(M1VT.getVectorMinNumElements() * i, DL);
+ Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
+ SubVec, SubIdx);
+ }
} else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX)) {
// If we have a shuffle which only uses the first register in our
// source register group, we can do a linear number of m1 vrgathers
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index d7120b4a16938..3e31c9de61657 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1311,22 +1311,14 @@ define void @shuffle_i128_splat(ptr %p) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: lui a2, 16
-; CHECK-NEXT: srli a1, a1, 3
+; CHECK-NEXT: lui a1, 16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a2
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v10, v9, a1
-; CHECK-NEXT: vslidedown.vx v11, v10, a1
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v13, v8, v10
-; CHECK-NEXT: vrgatherei16.vv v12, v8, v9
-; CHECK-NEXT: vrgatherei16.vv v14, v8, v11
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v9, v11, a1
+; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v15, v8, v9
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v9
+; CHECK-NEXT: vmv.v.v v13, v12
+; CHECK-NEXT: vmv.v.v v14, v12
+; CHECK-NEXT: vmv.v.v v15, v12
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vse64.v v12, (a0)
; CHECK-NEXT: ret
@@ -1435,3 +1427,20 @@ define <4 x i16> @vmerge_3(<4 x i16> %x) {
%s = shufflevector <4 x i16> %x, <4 x i16> <i16 poison, i16 5, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 5, i32 5, i32 3>
ret <4 x i16> %s
}
+
+
+define <8 x i64> @shuffle_v8i164_span_splat(<8 x i64> %a) nounwind {
+; CHECK-LABEL: shuffle_v8i164_span_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 1
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v9
+; CHECK-NEXT: vmv.v.v v13, v12
+; CHECK-NEXT: vmv.v.v v14, v12
+; CHECK-NEXT: vmv.v.v v15, v12
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+ ret <8 x i64> %res
+}
More information about the llvm-commits
mailing list