[llvm] [RISCV][IA] Use strided load for one active deinterleaveN(load) (PR #148892)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 10:10:47 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
This adds the analogous handling we use for the shuffle lowering to the deinterleaveN intrinsic path.
---
Full diff: https://github.com/llvm/llvm-project/pull/148892.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (+29-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll (+7-4)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 39603b92cc2f7..7dac87b07e990 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -243,20 +243,44 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
assert(LI->isSimple());
IRBuilder<> Builder(LI);
- Value *FirstActive =
- *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
- VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
+ auto FirstActiveItr =
+ llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
+ VectorType *ResVTy = cast<VectorType>((*FirstActiveItr)->getType());
const DataLayout &DL = LI->getDataLayout();
-
if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
LI->getPointerAddressSpace(), DL))
return false;
- Value *Return;
Type *PtrTy = LI->getPointerOperandType();
Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
+ // If the segment load is going to be performed segment at a time anyways
+ // and there's only one element used, use a strided load instead. This
+ // will be equally fast, and create less vector register pressure.
+ if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
+ 1 == llvm::count_if(DeinterleaveValues,
+ [](Value *V) { return V != nullptr; })) {
+ unsigned Idx = std::distance(DeinterleaveValues.begin(), FirstActiveItr);
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(ResVTy->getElementType());
+ Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
+ Value *Offset = ConstantInt::get(XLenTy, Idx * ScalarSizeInBytes);
+ Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
+ Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
+ Type *I32 = Type::getIntNTy(LI->getContext(), 32);
+ Value *VL = Builder.CreateElementCount(I32, ResVTy->getElementCount());
+
+ CallInst *CI =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
+ {ResVTy, BasePtr->getType(), Stride->getType()},
+ {BasePtr, Stride, Mask, VL});
+ Align A = commonAlignment(LI->getAlign(), Idx * ScalarSizeInBytes);
+ CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), A));
+ (*FirstActiveItr)->replaceAllUsesWith(CI);
+ return true;
+ }
+
+ Value *Return;
if (isa<FixedVectorType>(ResVTy)) {
Value *VL = Builder.CreateElementCount(XLenTy, ResVTy->getElementCount());
Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 9af92aa995f1f..e28428224c2ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -407,8 +407,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v8, (a0)
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
@@ -419,8 +420,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v5, (a0)
+; CHECK-NEXT: addi a0, a0, 3
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
``````````
</details>
https://github.com/llvm/llvm-project/pull/148892
More information about the llvm-commits
mailing list