[llvm] [RISCV] Prefer strided store for interleave store with one lane active (PR #119027)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 15:16:50 PST 2024
https://github.com/preames updated https://github.com/llvm/llvm-project/pull/119027
>From b077d05e7e225de5d4975ff116f29bc9b5dc346d Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 6 Dec 2024 12:18:09 -0800
Subject: [PATCH 1/2] [RISCV] Prefer strided store for interleave store with
one lane active
If we're performing a segment store and all but one of the segments are
undefined, that's equivalent to performing a strided store of the one
active segment.
This is the store side of a905203b. As before, this only covers
fixed vectors.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 +++++++++-
.../rvv/fixed-vectors-interleaved-access.ll | 52 ++++++++++++++++---
2 files changed, 71 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 83308682835394..743d68523bff76 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21906,6 +21906,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
IRBuilder<> Builder(SI);
+ auto Mask = SVI->getShuffleMask();
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
@@ -21917,11 +21918,35 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
+ unsigned Index;
+ // If the segment store only has one active lane (i.e. the interleave is
+ // just a spread shuffle), we can use a strided store instead. This will
+ // be equally fast, and create less vector register pressure.
+ if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
+ isSpreadMask(Mask, Factor, Index)) {
+ unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
+ Value *Data = SVI->getOperand(0);
+ auto *DataVTy = cast<FixedVectorType>(Data->getType());
+ Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
+ Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
+ Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
+ Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
+ Value *VL = Builder.getInt32(VTy->getNumElements());
+
+ CallInst *CI =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
+ {Data->getType(), BasePtr->getType(), Stride->getType()},
+ {Data, BasePtr, Stride, Mask, VL});
+ CI->addParamAttr(
+ 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
+
+ return true;
+ }
+
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
SI->getModule(), FixedVssegIntrIds[Factor - 2],
{VTy, SI->getPointerOperandType(), XLenTy});
- auto Mask = SVI->getShuffleMask();
SmallVector<Value *, 10> Ops;
for (unsigned i = 0; i < Factor; i++) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 5649ee20a47092..8833634be1a0ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1285,17 +1285,55 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) {
ret void
}
-; TODO: This should be a strided store
-define void @store_factor4_one_active_storeback(ptr %ptr, <4 x i32> %v) {
-; CHECK-LABEL: store_factor4_one_active_storeback:
+define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) {
+; CHECK-LABEL: store_factor4_one_active:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v9, v8, 1
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmv.v.v v11, v9
-; CHECK-NEXT: vsseg4e32.v v8, (a0)
+; CHECK-NEXT: vsse32.v v8, (a0), a1
; CHECK-NEXT: ret
%v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
store <16 x i32> %v0, ptr %ptr
ret void
}
+
+define void @store_factor4_one_active_idx1(ptr %ptr, <4 x i32> %v) {
+; CHECK-LABEL: store_factor4_one_active_idx1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 4
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef>
+ store <16 x i32> %v0, ptr %ptr
+ ret void
+}
+
+define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) {
+; CHECK-LABEL: store_factor4_one_active_fullwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %v0 = shufflevector <16 x i32> %v, <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
+ store <16 x i32> %v0, ptr %ptr
+ ret void
+}
+
+; TODO: This could be a vslidedown followed by a strided store
+define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
+; CHECK-LABEL: store_factor4_one_active_slidedown:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vslideup.vi v10, v8, 1
+; CHECK-NEXT: vmv.v.v v11, v10
+; CHECK-NEXT: vmv.v.v v12, v10
+; CHECK-NEXT: vsseg4e32.v v9, (a0)
+; CHECK-NEXT: ret
+ %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef>
+ store <16 x i32> %v0, ptr %ptr
+ ret void
+}
>From a70e68ef1c9726cc28ec03d804e88b0e0fa33bcc Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 6 Dec 2024 15:12:53 -0800
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 743d68523bff76..5cd3a3f1c32e66 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21933,10 +21933,10 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
Value *VL = Builder.getInt32(VTy->getNumElements());
- CallInst *CI =
- Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
- {Data->getType(), BasePtr->getType(), Stride->getType()},
- {Data, BasePtr, Stride, Mask, VL});
+ CallInst *CI = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vp_strided_store,
+ {Data->getType(), BasePtr->getType(), Stride->getType()},
+ {Data, BasePtr, Stride, Mask, VL});
CI->addParamAttr(
1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
More information about the llvm-commits
mailing list