[llvm] [RISCV]Lower one active interleaved load to normal segmented load (PR #185602)

Tue Mar 10 02:18:46 PDT 2026

https://github.com/ChunyuLiao created https://github.com/llvm/llvm-project/pull/185602

  We might expect to generate the vlse intrinsic in RISCVInterleavedAccess.cpp,
  but there’s an optimization for deinterleave loads in `RISCVTargetLowering::PerformDAGCombine`.
  We can generate a normal segmented load and let DAGCombine optimize it into vlse.
    
  No regression, looking forward to more discussion.

>From d78fa6ec6ab4652578a163134dc8a76a785297f1 Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Tue, 10 Mar 2026 08:10:15 +0000
Subject: [PATCH 1/2] init testcase

---
 .../rvv/fixed-vectors-interleaved-access.ll   | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 9b35860904f11..8eb632c7655a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -2465,3 +2465,25 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5_skip_fields(ptr %pt
   ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
 }
 
+define <4 x i8> @maskedload_factor5_one_active(ptr %ptr) {
+; CHECK-LABEL: maskedload_factor5_one_active:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addi a1, a1, 1057
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 20, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    li a0, 33
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 8
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT:    vmv.v.i v0, 10
+; CHECK-NEXT:    vslidedown.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    ret
+ %interleaved.vec = tail call <20 x i8> @llvm.masked.load.v20i8.p0(ptr %ptr, <20 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <20 x i8> poison)
+  %v0 = shufflevector <20 x i8> %interleaved.vec, <20 x i8> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
+  ret <4 x i8> %v0
+}

>From 5a2861585fd1e43be31262fa357472ab1b532a24 Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Tue, 10 Mar 2026 08:35:37 +0000
Subject: [PATCH 2/2] [RISCV]Lower one active interleaved load to normal
 segmented load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We might expect to generate the vlse intrinsic in the pass,
but there’s an optimization for deinterleave loads in RISCVTargetLowering::PerformDAGCombine。
We can generate a normal segmented load and let DAGCombine optimize it into vlse.

No regression, looking forward to more discussion.
---
 .../lib/Target/RISCV/RISCVInterleavedAccess.cpp |  4 ++--
 .../rvv/fixed-vectors-interleaved-access.ll     | 17 +++--------------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 528bbdf4c26c6..3a5f26042840f 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -216,7 +216,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
   // We only support cases where the skipped fields are the trailing ones.
   // TODO: Lower to strided load if there is only a single active field.
   unsigned MaskFactor = GapMask.popcount();
-  if (MaskFactor < 2 || !GapMask.isMask())
+  if (!GapMask.isMask())
     return false;
   IRBuilder<> Builder(Load);
 
@@ -235,7 +235,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
     return false;
 
   CallInst *SegLoad = nullptr;
-  if (MaskFactor < Factor) {
+  if (MaskFactor < Factor && MaskFactor != 1) {
     // Lower to strided segmented load.
     unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
     Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 8eb632c7655a9..82419c23bc124 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -2468,20 +2468,9 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5_skip_fields(ptr %pt
 define <4 x i8> @maskedload_factor5_one_active(ptr %ptr) {
 ; CHECK-LABEL: maskedload_factor5_one_active:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    addi a1, a1, 1057
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vsetivli zero, 20, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    li a0, 33
-; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 8
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vslidedown.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    li a1, 5
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
 ; CHECK-NEXT:    ret
  %interleaved.vec = tail call <20 x i8> @llvm.masked.load.v20i8.p0(ptr %ptr, <20 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <20 x i8> poison)
   %v0 = shufflevector <20 x i8> %interleaved.vec, <20 x i8> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>