[llvm] be19a27 - [RISCV] Correct stride for strided load/store of vectors of pointers in lowerInterleavedLoad/lowerInterleavedStore. (#147598)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 18:24:53 PDT 2025
Author: Craig Topper
Date: 2025-07-08T18:24:50-07:00
New Revision: be19a27cc5b15c712186010132705296ebc74c58
URL: https://github.com/llvm/llvm-project/commit/be19a27cc5b15c712186010132705296ebc74c58
DIFF: https://github.com/llvm/llvm-project/commit/be19a27cc5b15c712186010132705296ebc74c58.diff
LOG: [RISCV] Correct stride for strided load/store of vectors of pointers in lowerInterleavedLoad/lowerInterleavedStore. (#147598)
We need to use DataLayout to get the size if the element type
is a pointer.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5126ab6c31c28..d295a45149d3c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24111,10 +24111,11 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
IRBuilder<> Builder(LI);
+ const DataLayout &DL = LI->getDataLayout();
+
auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
- LI->getPointerAddressSpace(),
- LI->getDataLayout()))
+ LI->getPointerAddressSpace(), DL))
return false;
auto *PtrTy = LI->getPointerOperandType();
@@ -24124,7 +24125,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
// and there's only one element used, use a strided load instead. This
// will be equally fast, and create less vector register pressure.
if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
- unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
@@ -24187,14 +24188,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
IRBuilder<> Builder(SI);
+ const DataLayout &DL = SI->getDataLayout();
auto Mask = SVI->getShuffleMask();
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
ShuffleVTy->getNumElements() / Factor);
if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
- SI->getPointerAddressSpace(),
- SI->getDataLayout()))
+ SI->getPointerAddressSpace(), DL))
return false;
auto *PtrTy = SI->getPointerOperandType();
@@ -24206,7 +24207,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
// be equally fast, and create less vector register pressure.
if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
isSpreadMask(Mask, Factor, Index)) {
- unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
+ unsigned ScalarSizeInBytes =
+ DL.getTypeStoreSize(ShuffleVTy->getElementType());
Value *Data = SVI->getOperand(0);
auto *DataVTy = cast<FixedVectorType>(Data->getType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 5e3ae2faf1a53..041aae229288f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1662,6 +1662,25 @@ define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) {
ret <4 x i8> %v0
}
+define <4 x ptr> @load_factor3_one_active_ptr(ptr %ptr) {
+; RV32-LABEL: load_factor3_one_active_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 12
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vlse32.v v8, (a0), a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load_factor3_one_active_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 24
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vlse64.v v8, (a0), a1
+; RV64-NEXT: ret
+ %interleaved.vec = load <12 x ptr>, ptr %ptr
+ %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ ret <4 x ptr> %v0
+}
+
define void @load_factor4_one_active_storeback(ptr %ptr) {
; CHECK-LABEL: load_factor4_one_active_storeback:
; CHECK: # %bb.0:
@@ -1748,6 +1767,25 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
ret void
}
+define void @store_factor4_one_active_ptr(ptr %ptr, <4 x ptr> %v) {
+; RV32-LABEL: store_factor4_one_active_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsse32.v v8, (a0), a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: store_factor4_one_active_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vsse64.v v8, (a0), a1
+; RV64-NEXT: ret
+ %v0 = shufflevector <4 x ptr> %v, <4 x ptr> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
+ store <16 x ptr> %v0, ptr %ptr
+ ret void
+}
+
; Negative tests
define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
@@ -1766,8 +1804,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI49_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI49_0)
+; RV32-NEXT: lui a1, %hi(.LCPI51_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1842,8 +1880,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI50_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI50_0)
+; RV32-NEXT: lui a0, %hi(.LCPI52_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI52_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
More information about the llvm-commits
mailing list