[llvm] [IA] Add masked.load/store support for shuffle (de)interleave load/store (PR #150241)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 23 08:36:39 PDT 2025


https://github.com/preames created https://github.com/llvm/llvm-project/pull/150241

This completes the basic support for masked.laod and masked.store in InterleaveAccess.  The backend already added via the intrinsic lowering path and the common code structure (in RISCV at least).

Note that this isn't enough to enable in LV yet.  We still need support for recognizing an interleaved mask via a shufflevector in getMask.

>From 4dab891180ef0166a109a0e387be00110655384f Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 22 Jul 2025 16:08:46 -0700
Subject: [PATCH] [IA] Add masked.load/store support for shuffle (de)interleave
 load/store

This completes the basic support for masked.laod and masked.store in
InterleaveAccess.  The backend already added via the intrinsic lowering
path and the common code structure (in RISCV at least).

Note that this isn't enough to enable in LV yet.  We still need support
for recognizing an interleaved mask via a shufflevector in getMask.
---
 llvm/lib/CodeGen/InterleavedAccessPass.cpp    | 96 ++++++++++---------
 .../rvv/fixed-vectors-interleaved-access.ll   | 31 ++++++
 2 files changed, 80 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1b691881d67dd..65565b98508c7 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -253,6 +253,21 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
   return false;
 }
 
+static Value *getMaskOperand(IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unexpected intrinsic");
+  case Intrinsic::vp_load:
+    return II->getOperand(1);
+  case Intrinsic::masked_load:
+    return II->getOperand(2);
+  case Intrinsic::vp_store:
+    return II->getOperand(2);
+  case Intrinsic::masked_store:
+    return II->getOperand(3);
+  }
+}
+
 // Return the corresponded deinterleaved mask, or nullptr if there is no valid
 // mask.
 static Value *getMask(Value *WideMask, unsigned Factor,
@@ -268,8 +283,12 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
   if (isa<ScalableVectorType>(Load->getType()))
     return false;
 
-  if (auto *LI = dyn_cast<LoadInst>(Load);
-      LI && !LI->isSimple())
+  auto *LI = dyn_cast<LoadInst>(Load);
+  auto *II = dyn_cast<IntrinsicInst>(Load);
+  if (!LI && !II)
+    return false;
+
+  if (LI && !LI->isSimple())
     return false;
 
   // Check if all users of this load are shufflevectors. If we encounter any
@@ -322,7 +341,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
   // Holds the corresponding index for each DE-interleave shuffle.
   SmallVector<unsigned, 4> Indices;
 
-  Type *VecTy = FirstSVI->getType();
+  VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
 
   // Check if other shufflevectors are also DE-interleaved of the same type
   // and factor as the first shufflevector.
@@ -360,13 +379,16 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
       replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
 
   Value *Mask = nullptr;
-  if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
-    Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy));
+  if (LI) {
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+  } else {
+    // Check mask operand. Handle both all-true/false and interleaved mask.
+    Mask = getMask(getMaskOperand(II), Factor, VecTy);
     if (!Mask)
       return false;
-    LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n");
-  } else {
-    LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
+                      << *Load << "\n");
   }
 
   // Try to create target specific intrinsics to replace the load and
@@ -483,15 +505,16 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
 bool InterleavedAccessImpl::lowerInterleavedStore(
     Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) {
   Value *StoredValue;
-  if (auto *SI = dyn_cast<StoreInst>(Store)) {
+  auto *SI = dyn_cast<StoreInst>(Store);
+  auto *II = dyn_cast<IntrinsicInst>(Store);
+  if (SI) {
     if (!SI->isSimple())
       return false;
     StoredValue = SI->getValueOperand();
-  } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
-    assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
-    StoredValue = VPStore->getArgOperand(0);
   } else {
-    llvm_unreachable("unsupported store operation");
+    assert(II->getIntrinsicID() == Intrinsic::vp_store ||
+           II->getIntrinsicID() == Intrinsic::masked_store);
+    StoredValue = II->getArgOperand(0);
   }
 
   auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
@@ -508,18 +531,18 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
          "number of stored element should be a multiple of Factor");
 
   Value *Mask = nullptr;
-  if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
+  if (SI) {
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+  } else {
+    // Check mask operand. Handle both all-true/false and interleaved mask.
     unsigned LaneMaskLen = NumStoredElements / Factor;
-    Mask = getMask(VPStore->getMaskParam(), Factor,
+    Mask = getMask(getMaskOperand(II), Factor,
                    ElementCount::getFixed(LaneMaskLen));
     if (!Mask)
       return false;
 
-    LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store
-                      << "\n");
-
-  } else {
-    LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
+                      << *Store << "\n");
   }
 
   // Try to create target specific intrinsics to replace the store and
@@ -592,19 +615,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
     assert(II);
 
     // Check mask operand. Handle both all-true/false and interleaved mask.
-    Value *WideMask;
-    switch (II->getIntrinsicID()) {
-    default:
-      return false;
-    case Intrinsic::vp_load:
-      WideMask = II->getOperand(1);
-      break;
-    case  Intrinsic::masked_load:
-      WideMask = II->getOperand(2);
-      break;
-    }
-
-    Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
+    Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI));
     if (!Mask)
       return false;
 
@@ -642,18 +653,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
   Value *Mask = nullptr;
   if (II) {
     // Check mask operand. Handle both all-true/false and interleaved mask.
-    Value *WideMask;
-    switch (II->getIntrinsicID()) {
-    default:
-      return false;
-    case Intrinsic::vp_store:
-      WideMask = II->getOperand(2);
-      break;
-    case Intrinsic::masked_store:
-      WideMask = II->getOperand(3);
-      break;
-    }
-    Mask = getMask(WideMask, Factor,
+    Mask = getMask(getMaskOperand(II), Factor,
                    cast<VectorType>(InterleaveValues[0]->getType()));
     if (!Mask)
       return false;
@@ -687,11 +687,13 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
   using namespace PatternMatch;
   for (auto &I : instructions(F)) {
     if (match(&I, m_CombineOr(m_Load(m_Value()),
-                              m_Intrinsic<Intrinsic::vp_load>())))
+                              m_Intrinsic<Intrinsic::vp_load>())) ||
+        match(&I, m_Intrinsic<Intrinsic::masked_load>()))
       Changed |= lowerInterleavedLoad(&I, DeadInsts);
 
     if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
-                              m_Intrinsic<Intrinsic::vp_store>())))
+                              m_Intrinsic<Intrinsic::vp_store>())) ||
+        match(&I, m_Intrinsic<Intrinsic::masked_store>()))
       Changed |= lowerInterleavedStore(&I, DeadInsts);
 
     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 7274e1bb59b92..26e324c47111f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -2002,3 +2002,34 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
   %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
   ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
 }
+
+define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5(ptr %ptr) {
+; CHECK-LABEL: maskedload_factor5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg5e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <20 x i32> @llvm.masked.load(ptr %ptr, i32 4, <20 x i1> splat (i1 true), <20 x i32> poison)
+  %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
+  %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16>
+  %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17>
+  %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18>
+  %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
+  %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4
+  ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4
+}
+
+define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: maskedstore_factor2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsseg2e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  tail call void @llvm.masked.store(<8 x i32> %interleaved.vec, ptr %ptr, i32 4, <8 x i1> splat (i1 true))
+  ret void
+}



More information about the llvm-commits mailing list