[llvm] [IA] Use a single callback for lowerInterleaveIntrinsic [nfc] (#148978) (PR #149168)

Wed Jul 16 12:05:47 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Philip Reames (preames)

<details>
<summary>Changes</summary>

This continues in the direction started by commit 4b81dc7.  We essentially merges the handling for VPStore - currently in lowerInterleavedVPStore which is shared between shuffle and intrinsic based interleaves - into the existing dedicated routine.

---
Full diff: https://github.com/llvm/llvm-project/pull/149168.diff


6 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+6-2) 
- (modified) llvm/lib/CodeGen/InterleavedAccessPass.cpp (+8-11) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+6-1) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2-1) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+2-1) 
- (modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (+46-22) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 72594c7f9783c..e26d05bcda021 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3262,10 +3262,14 @@ class LLVM_ABI TargetLoweringBase {
   /// Return true on success. Currently only supports
   /// llvm.vector.interleave{2,3,5,7}
   ///
-  /// \p SI is the accompanying store instruction
+  /// \p Store is the accompanying store instruction.  Can be either a plain
+  /// store or a vp.store intrinsic.
+  /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+  /// component being interwoven) mask.  Can be nullptr, in which case the
+  /// result is uncondiitional.
   /// \p InterleaveValues contains the interleaved values.
   virtual bool
-  lowerInterleaveIntrinsicToStore(StoreInst *SI,
+  lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask,
                                   ArrayRef<Value *> InterleaveValues) const {
     return false;
   }
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 95599837e1bfc..0c0cabf40b039 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -681,23 +681,19 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
   const unsigned Factor = getInterleaveIntrinsicFactor(II->getIntrinsicID());
   assert(Factor && "unexpected interleave intrinsic");
 
+  Value *Mask = nullptr;
   if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
     if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
       return false;
 
     Value *WideMask = VPStore->getOperand(2);
-    Value *Mask = getMask(WideMask, Factor,
-                          cast<VectorType>(InterleaveValues[0]->getType()));
+    Mask = getMask(WideMask, Factor,
+                   cast<VectorType>(InterleaveValues[0]->getType()));
     if (!Mask)
       return false;
 
     LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
                       << *II << " and factor = " << Factor << "\n");
-
-    // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
-    // TLI function to emit target-specific interleaved instruction.
-    if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues))
-      return false;
   } else {
     auto *SI = cast<StoreInst>(StoredBy);
     if (!SI->isSimple())
@@ -705,12 +701,13 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
 
     LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
                       << " and factor = " << Factor << "\n");
-
-    // Try and match this with target specific intrinsics.
-    if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
-      return false;
   }
 
+  // Try and match this with target specific intrinsics.
+  if (!TLI->lowerInterleaveIntrinsicToStore(cast<Instruction>(StoredBy), Mask,
+                                            InterleaveValues))
+    return false;
+
   // We now have a target-specific store, so delete the old one.
   DeadInsts.insert(cast<Instruction>(StoredBy));
   DeadInsts.insert(II);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 235df9022c6fb..81c5263212dd6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17561,12 +17561,17 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
 }
 
 bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
-    StoreInst *SI, ArrayRef<Value *> InterleavedValues) const {
+    Instruction *Store, Value *Mask,
+    ArrayRef<Value *> InterleavedValues) const {
   unsigned Factor = InterleavedValues.size();
   if (Factor != 2 && Factor != 4) {
     LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
     return false;
   }
+  StoreInst *SI = dyn_cast<StoreInst>(Store);
+  if (!SI)
+    return false;
+  assert(!Mask && "Unexpected mask on plain store");
 
   VectorType *VTy = cast<VectorType>(InterleavedValues[0]->getType());
   const DataLayout &DL = SI->getModule()->getDataLayout();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6afb3c330d25b..b4671bb6bddf1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -223,7 +223,8 @@ class AArch64TargetLowering : public TargetLowering {
       ArrayRef<Value *> DeinterleaveValues) const override;
 
   bool lowerInterleaveIntrinsicToStore(
-      StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
+      Instruction *Store, Value *Mask,
+      ArrayRef<Value *> InterleaveValues) const override;
 
   bool isLegalAddImmediate(int64_t) const override;
   bool isLegalAddScalableImmediate(int64_t) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 41bbf6b9dcf2e..61ed23e70c3fd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -442,7 +442,8 @@ class RISCVTargetLowering : public TargetLowering {
       ArrayRef<Value *> DeinterleaveValues) const override;
 
   bool lowerInterleaveIntrinsicToStore(
-      StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
+      Instruction *Store, Value *Mask,
+      ArrayRef<Value *> InterleaveValues) const override;
 
   bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
                               ArrayRef<Value *> DeinterleaveRes) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index ddfacd970e950..1f92ec763700b 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -360,47 +360,71 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
 }
 
 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
-    StoreInst *SI, ArrayRef<Value *> InterleaveValues) const {
+    Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const {
   unsigned Factor = InterleaveValues.size();
   if (Factor > 8)
     return false;
 
-  assert(SI->isSimple());
-  IRBuilder<> Builder(SI);
+  IRBuilder<> Builder(Store);
 
   auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
-  auto *PtrTy = SI->getPointerOperandType();
-  const DataLayout &DL = SI->getDataLayout();
+  const DataLayout &DL = Store->getDataLayout();
+  Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
 
-  if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
-                                    SI->getPointerAddressSpace(), DL))
-    return false;
+  Value *Ptr, *VL;
+  Align Alignment;
+  if (auto *SI = dyn_cast<StoreInst>(Store)) {
+    assert(SI->isSimple());
+    Ptr = SI->getPointerOperand();
+    Alignment = SI->getAlign();
+    assert(!Mask && "Unexpected mask on a store");
+    Mask = Builder.getAllOnesMask(InVTy->getElementCount());
+    VL = isa<FixedVectorType>(InVTy)
+             ? Builder.CreateElementCount(XLenTy, InVTy->getElementCount())
+             : Constant::getAllOnesValue(XLenTy);
+  } else {
+    auto *VPStore = cast<VPIntrinsic>(Store);
+    assert(VPStore->getIntrinsicID() == Intrinsic::vp_store &&
+           "Unexpected intrinsic");
+    Ptr = VPStore->getMemoryPointerParam();
+    Alignment = VPStore->getPointerAlignment().value_or(
+        DL.getABITypeAlign(InVTy->getElementType()));
+
+    assert(Mask && "vp.store needs a mask!");
+
+    Value *WideEVL = VPStore->getVectorLengthParam();
+    // Conservatively check if EVL is a multiple of factor, otherwise some
+    // (trailing) elements might be lost after the transformation.
+    if (!isMultipleOfN(WideEVL, DL, Factor))
+      return false;
 
-  Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
+    VL = Builder.CreateZExt(
+        Builder.CreateUDiv(WideEVL,
+                           ConstantInt::get(WideEVL->getType(), Factor)),
+        XLenTy);
+  }
+  Type *PtrTy = Ptr->getType();
+  unsigned AS = Ptr->getType()->getPointerAddressSpace();
+  if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL))
+    return false;
 
   if (isa<FixedVectorType>(InVTy)) {
     Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
-        SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy});
-
+        Store->getModule(), FixedVssegIntrIds[Factor - 2],
+        {InVTy, PtrTy, XLenTy});
     SmallVector<Value *, 10> Ops(InterleaveValues);
-    Value *VL = Builder.CreateElementCount(XLenTy, InVTy->getElementCount());
-    Value *Mask = Builder.getAllOnesMask(InVTy->getElementCount());
-    Ops.append({SI->getPointerOperand(), Mask, VL});
-
+    Ops.append({Ptr, Mask, VL});
     Builder.CreateCall(VssegNFunc, Ops);
     return true;
   }
   unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
   unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
   Type *VecTupTy = TargetExtType::get(
-      SI->getContext(), "riscv.vector.tuple",
-      ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
+      Store->getContext(), "riscv.vector.tuple",
+      ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
                               NumElts * SEW / 8),
       Factor);
 
-  Value *VL = Constant::getAllOnesValue(XLenTy);
-  Value *Mask = Builder.getAllOnesMask(InVTy->getElementCount());
-
   Value *StoredVal = PoisonValue::get(VecTupTy);
   for (unsigned i = 0; i < Factor; ++i)
     StoredVal = Builder.CreateIntrinsic(
@@ -408,10 +432,10 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
         {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
 
   Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
-      SI->getModule(), ScalableVssegIntrIds[Factor - 2],
+      Store->getModule(), ScalableVssegIntrIds[Factor - 2],
       {VecTupTy, PtrTy, Mask->getType(), VL->getType()});
 
-  Value *Operands[] = {StoredVal, SI->getPointerOperand(), Mask, VL,
+  Value *Operands[] = {StoredVal, Ptr, Mask, VL,
                        ConstantInt::get(XLenTy, Log2_64(SEW))};
   Builder.CreateCall(VssegNFunc, Operands);
   return true;

``````````

</details>


https://github.com/llvm/llvm-project/pull/149168