[llvm] a0a904e - [RISCV] Collect shuffle mask for the lane not by createSequentialMask (#129830)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 4 23:31:18 PST 2025


Author: Jim Lin
Date: 2025-03-05T15:31:16+08:00
New Revision: a0a904e9467cfa8f980c31181caf6be402065b19

URL: https://github.com/llvm/llvm-project/commit/a0a904e9467cfa8f980c31181caf6be402065b19
DIFF: https://github.com/llvm/llvm-project/commit/a0a904e9467cfa8f980c31181caf6be402065b19.diff

LOG: [RISCV] Collect shuffle mask for the lane not by createSequentialMask (#129830)

If there are the shuffle mask <1, u, u, u, 2, u, u, u> with factor 4. we
should have the shuffle mask <1, 2> for lane 0 and <u, u> for lane 1,
and so on. Since we use createSequentialMask to create the shuffle mask,
the shuffle mask for lane 1 would be <u, 0>(dervied from <u, u+1>). This
leads to poor code generation.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 73f8de7208cbe..e59c0475a0021 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23110,12 +23110,18 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
       {VTy, SI->getPointerOperandType(), XLenTy});
 
   SmallVector<Value *, 10> Ops;
+  SmallVector<int, 16> NewShuffleMask;
 
   for (unsigned i = 0; i < Factor; i++) {
+    // Collect shuffle mask for this lane.
+    for (unsigned j = 0; j < VTy->getNumElements(); j++)
+      NewShuffleMask.push_back(Mask[i + Factor * j]);
+
     Value *Shuffle = Builder.CreateShuffleVector(
-        SVI->getOperand(0), SVI->getOperand(1),
-        createSequentialMask(Mask[i], VTy->getNumElements(), 0));
+        SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask);
     Ops.push_back(Shuffle);
+
+    NewShuffleMask.clear();
   }
   // This VL should be OK (should be executable in one vsseg instruction,
   // potentially under larger LMULs) because we checked that the fixed vector

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 4200837227899..7cc8c0c3f2d89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1394,16 +1394,12 @@ define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) {
   ret void
 }
 
-; TODO: This could be a vslidedown followed by a strided store
 define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
 ; CHECK-LABEL: store_factor4_one_active_slidedown:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 1
-; CHECK-NEXT:    vslideup.vi v10, v8, 1
-; CHECK-NEXT:    vmv.v.v v11, v10
-; CHECK-NEXT:    vmv.v.v v12, v10
-; CHECK-NEXT:    vsseg4e32.v v9, (a0)
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vsseg4e32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 4,  i32 undef, i32 undef, i32 undef>
   store <16 x i32> %v0, ptr %ptr


        


More information about the llvm-commits mailing list