[llvm] ff8f6ab - Reapply "[RISCV] Allow undef prefix for local repeating VLA shuffle lowering (#126097)"
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 07:40:20 PST 2025
Author: Philip Reames
Date: 2025-02-12T07:40:13-08:00
New Revision: ff8f6abe20a9dfbb1de8d978f865092413d3416c
URL: https://github.com/llvm/llvm-project/commit/ff8f6abe20a9dfbb1de8d978f865092413d3416c
DIFF: https://github.com/llvm/llvm-project/commit/ff8f6abe20a9dfbb1de8d978f865092413d3416c.diff
LOG: Reapply "[RISCV] Allow undef prefix for local repeating VLA shuffle lowering (#126097)"
(With a fix to recently added code.)
Implement the first TODO from #125735, and minorly cleanup code using
same style as the recently landed strict prefix case.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7ca8482149eb9..1dfe6f0f8900c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5338,13 +5338,17 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
/// Is this mask local (i.e. elements only move within their local span), and
/// repeating (that is, the same rearrangement is being done within each span)?
static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
- // TODO: Could improve the case where undef elements exist in the first span.
+ SmallVector<int> LowSpan(Span, -1);
for (auto [I, M] : enumerate(Mask)) {
if (M == -1)
continue;
- int ChunkLo = I - (I % Span);
- int ChunkHi = ChunkLo + Span;
- if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span])
+ if ((M / Span) != (int)(I / Span))
+ return false;
+ int SpanIdx = I % Span;
+ int Expected = M % Span;
+ if (LowSpan[SpanIdx] == -1)
+ LowSpan[SpanIdx] = Expected;
+ if (LowSpan[SpanIdx] != Expected)
return false;
}
return true;
@@ -5745,12 +5749,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
SDValue Gather;
- // If we have a locally repeating mask, then we can reuse the first register
- // in the index register group for all registers within the source register
- // group. TODO: This generalizes to m2, and m4.
- const MVT M1VT = getLMUL1VT(ContainerVT);
- auto VLMAX = RISCVTargetLowering::computeVLMAXBounds(M1VT, Subtarget).first;
- if (ContainerVT.bitsGT(M1VT) && isLocalRepeatingShuffle(Mask, VLMAX)) {
+ if (NumElts > MinVLMAX && isLocalRepeatingShuffle(Mask, MinVLMAX)) {
+ // If we have a locally repeating mask, then we can reuse the first
+ // register in the index register group for all registers within the
+ // source register group. TODO: This generalizes to m2, and m4.
+ const MVT M1VT = getLMUL1VT(ContainerVT);
EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
SDValue SubIndex =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
@@ -5772,12 +5775,13 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
SubVec, SubIdx);
}
- } else if (ContainerVT.bitsGT(M1VT) && isLowSourceShuffle(Mask, VLMAX)) {
+ } else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX)) {
// If we have a shuffle which only uses the first register in our
// source register group, we can do a linear number of m1 vrgathers
// reusing the same source register (but with
diff erent indices)
// TODO: This can be generalized for m2 or m4, or for any shuffle
// for which we can do a vslidedown followed by this expansion.
+ const MVT M1VT = getLMUL1VT(ContainerVT);
EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
auto [InnerTrueMask, InnerVL] =
getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index 86d8a275a9055..c9fe39685fbc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -969,11 +969,44 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) {
ret <8 x i64> %shuffle
}
-define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
-; CHECK-LABEL: shuffle_v8i64_as_i256:
+; Test case where first span has undefs
+define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i128_2:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v13, v9, v16
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT: vrgatherei16.vv v14, v10, v16
+; CHECK-NEXT: vrgatherei16.vv v15, v11, v16
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+;
+; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2:
+; ZVKB-V: # %bb.0:
+; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0)
+; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVKB-V-NEXT: vle16.v v16, (a0)
+; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVKB-V-NEXT: vrgatherei16.vv v13, v9, v16
+; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16
+; ZVKB-V-NEXT: vrgatherei16.vv v14, v10, v16
+; ZVKB-V-NEXT: vrgatherei16.vv v15, v11, v16
+; ZVKB-V-NEXT: vmv4r.v v8, v12
+; ZVKB-V-NEXT: ret
+ %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 undef, i32 undef, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+ ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -982,8 +1015,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
;
; ZVKB-V-LABEL: shuffle_v8i64_as_i256:
; ZVKB-V: # %bb.0:
-; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0)
-; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0)
+; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0)
; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; ZVKB-V-NEXT: vle16.v v16, (a0)
; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16
@@ -996,8 +1029,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) {
; CHECK-LABEL: shuffle_v8i64_as_i256_zvl256b:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
+; CHECK-NEXT: lui a0, %hi(.LCPI32_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0)
; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -1008,8 +1041,8 @@ define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0)
;
; ZVKB-V-LABEL: shuffle_v8i64_as_i256_zvl256b:
; ZVKB-V: # %bb.0:
-; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0)
-; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0)
+; ZVKB-V-NEXT: lui a0, %hi(.LCPI32_0)
+; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI32_0)
; ZVKB-V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVKB-V-NEXT: vle16.v v12, (a0)
; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma
More information about the llvm-commits
mailing list