[llvm] be19a27 - [RISCV] Correct stride for strided load/store of vectors of pointers in lowerInterleavedLoad/lowerInterleavedStore. (#147598)

Tue Jul 8 18:24:53 PDT 2025

Author: Craig Topper
Date: 2025-07-08T18:24:50-07:00
New Revision: be19a27cc5b15c712186010132705296ebc74c58

URL: https://github.com/llvm/llvm-project/commit/be19a27cc5b15c712186010132705296ebc74c58
DIFF: https://github.com/llvm/llvm-project/commit/be19a27cc5b15c712186010132705296ebc74c58.diff

LOG: [RISCV] Correct stride for strided load/store of vectors of pointers in lowerInterleavedLoad/lowerInterleavedStore. (#147598)

We need to use DataLayout to get the size if the element type
is a pointer.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5126ab6c31c28..d295a45149d3c 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24111,10 +24111,11 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
 
   IRBuilder<> Builder(LI);
 
+  const DataLayout &DL = LI->getDataLayout();
+
   auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
   if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
-                                    LI->getPointerAddressSpace(),
-                                    LI->getDataLayout()))
+                                    LI->getPointerAddressSpace(), DL))
     return false;
 
   auto *PtrTy = LI->getPointerOperandType();
@@ -24124,7 +24125,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
   // and there's only one element used, use a strided load instead.  This
   // will be equally fast, and create less vector register pressure.
   if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
-    unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
+    unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
     Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
     Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
     Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
@@ -24187,14 +24188,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
                                                 ShuffleVectorInst *SVI,
                                                 unsigned Factor) const {
   IRBuilder<> Builder(SI);
+  const DataLayout &DL = SI->getDataLayout();
   auto Mask = SVI->getShuffleMask();
   auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
   // Given SVI : <n*factor x ty>, then VTy : <n x ty>
   auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
                                    ShuffleVTy->getNumElements() / Factor);
   if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
-                                    SI->getPointerAddressSpace(),
-                                    SI->getDataLayout()))
+                                    SI->getPointerAddressSpace(), DL))
     return false;
 
   auto *PtrTy = SI->getPointerOperandType();
@@ -24206,7 +24207,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
   // be equally fast, and create less vector register pressure.
   if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
       isSpreadMask(Mask, Factor, Index)) {
-    unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
+    unsigned ScalarSizeInBytes =
+        DL.getTypeStoreSize(ShuffleVTy->getElementType());
     Value *Data = SVI->getOperand(0);
     auto *DataVTy = cast<FixedVectorType>(Data->getType());
     Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 5e3ae2faf1a53..041aae229288f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1662,6 +1662,25 @@ define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) {
   ret <4 x i8> %v0
 }
 
+define <4 x ptr> @load_factor3_one_active_ptr(ptr %ptr) {
+; RV32-LABEL: load_factor3_one_active_ptr:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 12
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vlse32.v v8, (a0), a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: load_factor3_one_active_ptr:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vlse64.v v8, (a0), a1
+; RV64-NEXT:    ret
+  %interleaved.vec = load <12 x ptr>, ptr %ptr
+  %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  ret <4 x ptr> %v0
+}
+
 define void @load_factor4_one_active_storeback(ptr %ptr) {
 ; CHECK-LABEL: load_factor4_one_active_storeback:
 ; CHECK:       # %bb.0:
@@ -1748,6 +1767,25 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
   ret void
 }
 
+define void @store_factor4_one_active_ptr(ptr %ptr, <4 x ptr> %v) {
+; RV32-LABEL: store_factor4_one_active_ptr:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vsse32.v v8, (a0), a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_factor4_one_active_ptr:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vsse64.v v8, (a0), a1
+; RV64-NEXT:    ret
+  %v0 = shufflevector <4 x ptr> %v, <4 x ptr> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3,  i32 undef, i32 undef, i32 undef>
+  store <16 x ptr> %v0, ptr %ptr
+  ret void
+}
+
 ; Negative tests
 
 define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
@@ -1766,8 +1804,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
 ; RV32-NEXT:    vle32.v v12, (a0), v0.t
 ; RV32-NEXT:    li a0, 36
 ; RV32-NEXT:    vmv.s.x v20, a1
-; RV32-NEXT:    lui a1, %hi(.LCPI49_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI49_0)
+; RV32-NEXT:    lui a1, %hi(.LCPI51_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI51_0)
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vle16.v v21, (a1)
 ; RV32-NEXT:    vcompress.vm v8, v12, v11
@@ -1842,8 +1880,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
 ; RV32-NEXT:    vmv.s.x v10, a0
 ; RV32-NEXT:    li a0, 146
 ; RV32-NEXT:    vmv.s.x v11, a0
-; RV32-NEXT:    lui a0, %hi(.LCPI50_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI50_0)
+; RV32-NEXT:    lui a0, %hi(.LCPI52_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI52_0)
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vle16.v v20, (a0)
 ; RV32-NEXT:    li a0, 36