[llvm] [RISCV] Allow undef prefix for local repeating VLA shuffle lowering (PR #126097)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 09:40:24 PST 2025


https://github.com/preames created https://github.com/llvm/llvm-project/pull/126097

Implement the first TODO from #125735, and minorly cleanup code using same style as the recently landed strict prefix case.

>From ab283cbe261f3627c79b62de2611675999bb2d3d Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 6 Feb 2025 09:33:39 -0800
Subject: [PATCH] [RISCV] Allow undef prefix for local repeating VLA shuffle
 lowering

Implement the first TODO from #125735, and minorly cleanup code
using same style as the recently landed strict prefix case.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 17 ++++---
 .../RISCV/rvv/fixed-vectors-shuffle-rotate.ll | 49 ++++++++++++++++---
 2 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 165c71d8e03f163..ec74523a31227b0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5338,13 +5338,19 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
 /// Is this mask local (i.e. elements only move within their local span), and
 /// repeating (that is, the same rearrangement is being done within each span)?
 static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
-  // TODO: Could improve the case where undef elements exist in the first span.
+  SmallVector<int> LowSpan(Span, -1);
   for (auto [I, M] : enumerate(Mask)) {
     if (M == -1)
       continue;
-    int ChunkLo = I - (I % Span);
+    int SpanIdx = I % Span;
+    int ChunkLo = I - SpanIdx;
     int ChunkHi = ChunkLo + Span;
-    if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span])
+    if (M < ChunkLo || M >= ChunkHi)
+      return false;
+    int Expected = M - ChunkLo;
+    if (LowSpan[SpanIdx] == -1)
+      LowSpan[SpanIdx] = Expected;
+    if (LowSpan[SpanIdx] != Expected)
       return false;
   }
   return true;
@@ -5742,9 +5748,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     // If we have a locally repeating mask, then we can reuse the first register
     // in the index register group for all registers within the source register
     // group.  TODO: This generalizes to m2, and m4.
-    const MVT M1VT = getLMUL1VT(ContainerVT);
-    auto VLMAX = RISCVTargetLowering::computeVLMAXBounds(M1VT, Subtarget).first;
-    if (ContainerVT.bitsGT(M1VT) && isLocalRepeatingShuffle(Mask, VLMAX)) {
+    if (NumElts > MinVLMAX && isLocalRepeatingShuffle(Mask, MinVLMAX)) {
+      const MVT M1VT = getLMUL1VT(ContainerVT);
       EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
       SDValue SubIndex =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index 86d8a275a905508..c9fe39685fbc6fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -969,11 +969,44 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) {
   ret <8 x i64> %shuffle
 }
 
-define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
-; CHECK-LABEL: shuffle_v8i64_as_i256:
+; Test case where first span has undefs
+define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i128_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI30_0)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI30_0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v13, v9, v16
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT:    vrgatherei16.vv v14, v10, v16
+; CHECK-NEXT:    vrgatherei16.vv v15, v11, v16
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+;
+; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2:
+; ZVKB-V:       # %bb.0:
+; ZVKB-V-NEXT:    lui a0, %hi(.LCPI30_0)
+; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVKB-V-NEXT:    vle16.v v16, (a0)
+; ZVKB-V-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; ZVKB-V-NEXT:    vrgatherei16.vv v13, v9, v16
+; ZVKB-V-NEXT:    vrgatherei16.vv v12, v8, v16
+; ZVKB-V-NEXT:    vrgatherei16.vv v14, v10, v16
+; ZVKB-V-NEXT:    vrgatherei16.vv v15, v11, v16
+; ZVKB-V-NEXT:    vmv4r.v v8, v12
+; ZVKB-V-NEXT:    ret
+  %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 undef, i32 undef, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI31_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI31_0)
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle16.v v16, (a0)
 ; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
@@ -982,8 +1015,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
 ;
 ; ZVKB-V-LABEL: shuffle_v8i64_as_i256:
 ; ZVKB-V:       # %bb.0:
-; ZVKB-V-NEXT:    lui a0, %hi(.LCPI30_0)
-; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT:    lui a0, %hi(.LCPI31_0)
+; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI31_0)
 ; ZVKB-V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; ZVKB-V-NEXT:    vle16.v v16, (a0)
 ; ZVKB-V-NEXT:    vrgatherei16.vv v12, v8, v16
@@ -996,8 +1029,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
 define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) {
 ; CHECK-LABEL: shuffle_v8i64_as_i256_zvl256b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI31_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI31_0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI32_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI32_0)
 ; CHECK-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
@@ -1008,8 +1041,8 @@ define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0)
 ;
 ; ZVKB-V-LABEL: shuffle_v8i64_as_i256_zvl256b:
 ; ZVKB-V:       # %bb.0:
-; ZVKB-V-NEXT:    lui a0, %hi(.LCPI31_0)
-; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI31_0)
+; ZVKB-V-NEXT:    lui a0, %hi(.LCPI32_0)
+; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI32_0)
 ; ZVKB-V-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVKB-V-NEXT:    vle16.v v12, (a0)
 ; ZVKB-V-NEXT:    vsetvli a0, zero, e64, m1, ta, ma



More information about the llvm-commits mailing list