[llvm] [IA] Generalize the support for power-of-two (de)interleave intrinsics (PR #123863)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 21 16:55:44 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

<details>
<summary>Changes</summary>

Previously, AArch64 used pattern matching to support llvm.vector.(de)interleave of 2 and 4; RISC-V only supported (de)interleave of 2.

This patch consolidates the logics in these two targets by factoring out the common factor calculations into the InterleaveAccess Pass.

-----
Split out from #<!-- -->120490 

---

Patch is 107.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123863.diff


11 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+9-11) 
- (modified) llvm/lib/CodeGen/InterleavedAccessPass.cpp (+174-8) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+9-197) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2-2) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+29-21) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+154-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll (+30-11) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll (+172-51) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll (+31-14) 
- (modified) llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll (+204) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 38ac90f0c081b3..10680f43e9ddd7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3157,12 +3157,11 @@ class TargetLoweringBase {
   /// llvm.vector.deinterleave2
   ///
   /// \p DI is the deinterleave intrinsic.
-  /// \p LI is the accompanying load instruction
-  /// \p DeadInsts is a reference to a vector that keeps track of dead
-  /// instruction during transformations.
-  virtual bool lowerDeinterleaveIntrinsicToLoad(
-      IntrinsicInst *DI, LoadInst *LI,
-      SmallVectorImpl<Instruction *> &DeadInsts) const {
+  /// \p LI is the accompanying load instruction.
+  /// \p DeinterleaveValues contains the deinterleaved values.
+  virtual bool
+  lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, LoadInst *LI,
+                                   ArrayRef<Value *> DeinterleaveValues) const {
     return false;
   }
 
@@ -3172,11 +3171,10 @@ class TargetLoweringBase {
   ///
   /// \p II is the interleave intrinsic.
   /// \p SI is the accompanying store instruction
-  /// \p DeadInsts is a reference to a vector that keeps track of dead
-  /// instruction during transformations.
-  virtual bool lowerInterleaveIntrinsicToStore(
-      IntrinsicInst *II, StoreInst *SI,
-      SmallVectorImpl<Instruction *> &DeadInsts) const {
+  /// \p InterleaveValues contains the interleaved values.
+  virtual bool
+  lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI,
+                                  ArrayRef<Value *> InterleaveValues) const {
     return false;
   }
 
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c6d5533fd2bae2..97414d35e4b100 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -60,6 +60,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
@@ -478,6 +479,162 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
   return true;
 }
 
+// For an (de)interleave tree like this:
+//
+//   A   C B   D
+//   |___| |___|
+//     |_____|
+//        |
+//     A B C D
+//
+//  We will get ABCD at the end while the leaf operands/results
+//  are ACBD, which are also what we initially collected in
+//  getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
+//  hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
+//  to reorder them by interleaving these values.
+static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
+  int NumLeaves = SubLeaves.size();
+  if (NumLeaves == 2)
+    return;
+
+  assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
+
+  const int HalfLeaves = NumLeaves / 2;
+  // Visit the sub-trees.
+  interleaveLeafValues(SubLeaves.take_front(HalfLeaves));
+  interleaveLeafValues(SubLeaves.drop_front(HalfLeaves));
+
+  SmallVector<Value *, 8> Buffer;
+  // The step is alternating between +half and -half+1. We exit the
+  // loop right before the last element because given the fact that
+  // SubLeaves always has an even number of elements, the last element
+  // will never be moved and the last to be visited. This simplifies
+  // the exit condition.
+  for (int i = 0; i < NumLeaves - 1;
+       (i < HalfLeaves) ? i += HalfLeaves : i += (1 - HalfLeaves))
+    Buffer.push_back(SubLeaves[i]);
+
+  llvm::copy(Buffer, SubLeaves.begin());
+}
+
+static bool
+getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
+                          SmallVectorImpl<Instruction *> &DeadInsts) {
+  if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
+    return false;
+
+  // Visit with BFS
+  SmallVector<IntrinsicInst *, 8> Queue;
+  Queue.push_back(II);
+  while (!Queue.empty()) {
+    IntrinsicInst *Current = Queue.front();
+    Queue.erase(Queue.begin());
+
+    // All the intermediate intrinsics will be deleted.
+    DeadInsts.push_back(Current);
+
+    for (unsigned I = 0; I < 2; ++I) {
+      Value *Op = Current->getOperand(I);
+      if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
+        if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
+          Queue.push_back(OpII);
+          continue;
+        }
+
+      // If this is not a perfectly balanced tree, the leaf
+      // result types would be different.
+      if (!Operands.empty() && Op->getType() != Operands.back()->getType())
+        return false;
+
+      Operands.push_back(Op);
+    }
+  }
+
+  const unsigned Factor = Operands.size();
+  // Currently we only recognize power-of-two factors.
+  // FIXME: should we assert here instead?
+  if (Factor <= 1 || !isPowerOf2_32(Factor))
+    return false;
+
+  interleaveLeafValues(Operands);
+  return true;
+}
+
+static bool
+getVectorDeinterleaveFactor(IntrinsicInst *II,
+                            SmallVectorImpl<Value *> &Results,
+                            SmallVectorImpl<Instruction *> &DeadInsts) {
+  using namespace PatternMatch;
+  if (II->getIntrinsicID() != Intrinsic::vector_deinterleave2 ||
+      !II->hasNUses(2))
+    return false;
+
+  // Visit with BFS
+  SmallVector<IntrinsicInst *, 8> Queue;
+  Queue.push_back(II);
+  while (!Queue.empty()) {
+    IntrinsicInst *Current = Queue.front();
+    Queue.erase(Queue.begin());
+    assert(Current->hasNUses(2));
+
+    // All the intermediate intrinsics will be deleted from the bottom-up.
+    DeadInsts.insert(DeadInsts.begin(), Current);
+
+    ExtractValueInst *LHS = nullptr, *RHS = nullptr;
+    for (User *Usr : Current->users()) {
+      if (!isa<ExtractValueInst>(Usr))
+        return 0;
+
+      auto *EV = cast<ExtractValueInst>(Usr);
+      // Intermediate ExtractValue instructions will also be deleted.
+      DeadInsts.insert(DeadInsts.begin(), EV);
+      ArrayRef<unsigned> Indices = EV->getIndices();
+      if (Indices.size() != 1)
+        return false;
+
+      if (Indices[0] == 0 && !LHS)
+        LHS = EV;
+      else if (Indices[0] == 1 && !RHS)
+        RHS = EV;
+      else
+        return false;
+    }
+
+    // We have legal indices. At this point we're either going
+    // to continue the traversal or push the leaf values into Results.
+    for (ExtractValueInst *EV : {LHS, RHS}) {
+      // Continue the traversal. We're playing safe here and matching only the
+      // expression consisting of a perfectly balanced binary tree in which all
+      // intermediate values are only used once.
+      if (EV->hasOneUse() &&
+          match(EV->user_back(),
+                m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
+          EV->user_back()->hasNUses(2)) {
+        auto *EVUsr = cast<IntrinsicInst>(EV->user_back());
+        Queue.push_back(EVUsr);
+        continue;
+      }
+
+      // If this is not a perfectly balanced tree, the leaf
+      // result types would be different.
+      if (!Results.empty() && EV->getType() != Results.back()->getType())
+        return false;
+
+      // Save the leaf value.
+      Results.push_back(EV);
+    }
+  }
+
+  const unsigned Factor = Results.size();
+  // Currently we only recognize power-of-two factors.
+  // FIXME: should we assert here instead?
+  if (Factor <= 1 || !isPowerOf2_32(Factor))
+    return 0;
+
+  interleaveLeafValues(Results);
+  return true;
+}
+
 bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
     IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
   LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
@@ -485,16 +642,21 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
   if (!LI || !LI->hasOneUse() || !LI->isSimple())
     return false;
 
-  LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
+  SmallVector<Value *, 8> DeinterleaveValues;
+  SmallVector<Instruction *, 8> DeinterleaveDeadInsts;
+  if (!getVectorDeinterleaveFactor(DI, DeinterleaveValues,
+                                   DeinterleaveDeadInsts))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
+                    << " with factor = " << DeinterleaveValues.size() << "\n");
 
   // Try and match this with target specific intrinsics.
-  SmallVector<Instruction *, 4> DeinterleaveDeadInsts;
-  if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeinterleaveDeadInsts))
+  if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeinterleaveValues))
     return false;
 
   DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
   // We now have a target-specific load, so delete the old one.
-  DeadInsts.insert(DI);
   DeadInsts.insert(LI);
   return true;
 }
@@ -509,16 +671,20 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
   if (!SI || !SI->isSimple())
     return false;
 
-  LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
+  SmallVector<Value *, 8> InterleaveValues;
+  SmallVector<Instruction *, 8> InterleaveDeadInsts;
+  if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
+                    << " with factor = " << InterleaveValues.size() << "\n");
 
-  SmallVector<Instruction *, 4> InterleaveDeadInsts;
   // Try and match this with target specific intrinsics.
-  if (!TLI->lowerInterleaveIntrinsicToStore(II, SI, InterleaveDeadInsts))
+  if (!TLI->lowerInterleaveIntrinsicToStore(II, SI, InterleaveValues))
     return false;
 
   // We now have a target-specific store, so delete the old one.
   DeadInsts.insert(SI);
-  DeadInsts.insert(II);
   DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
   return true;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9a0bb73087980d..13c9dd4187ab73 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17464,142 +17464,9 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   return true;
 }
 
-bool getDeinterleave2Values(
-    Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
-    SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
-  if (!DI->hasNUses(2))
-    return false;
-  auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
-  auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
-  if (!Extr1 || !Extr2)
-    return false;
-
-  DeinterleavedValues.resize(2);
-  // Place the values into the vector in the order of extraction:
-  DeinterleavedValues[0x1 & (Extr1->getIndices()[0])] = Extr1;
-  DeinterleavedValues[0x1 & (Extr2->getIndices()[0])] = Extr2;
-  if (!DeinterleavedValues[0] || !DeinterleavedValues[1])
-    return false;
-
-  // Make sure that the extracted values match the deinterleave tree pattern
-  if (!match(DeinterleavedValues[0], m_ExtractValue<0>((m_Specific(DI)))) ||
-      !match(DeinterleavedValues[1], m_ExtractValue<1>((m_Specific(DI))))) {
-    LLVM_DEBUG(dbgs() << "matching deinterleave2 failed\n");
-    return false;
-  }
-  // DeinterleavedValues will be replace by output of ld2
-  DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
-                               DeinterleavedValues.begin(),
-                               DeinterleavedValues.end());
-  return true;
-}
-
-/*
-DeinterleaveIntrinsic tree:
-                   [DI]
-                /        \
-         [Extr<0>]      [Extr<1>]
-            |                 |
-           [DI]              [DI]
-          /    \            /    \
-    [Extr<0>][Extr<1>] [Extr<0>][Extr<1>]
-        |       |         |         |
-roots:  A       C         B         D
-roots in correct order of DI4 will be: A B C D.
-Returns true if `DI` is the top of an IR tree that represents a theoretical
-vector.deinterleave4 intrinsic. When true is returned, \p `DeinterleavedValues`
-vector is populated with the results such an intrinsic would return: (i.e. {A,
-B, C, D } = vector.deinterleave4(...))
-*/
-bool getDeinterleave4Values(
-    Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
-    SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
-  if (!DI->hasNUses(2))
-    return false;
-  auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
-  auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
-  if (!Extr1 || !Extr2)
-    return false;
-
-  if (!Extr1->hasOneUse() || !Extr2->hasOneUse())
-    return false;
-  auto *DI1 = *(Extr1->user_begin());
-  auto *DI2 = *(Extr2->user_begin());
-
-  if (!DI1->hasNUses(2) || !DI2->hasNUses(2))
-    return false;
-  // Leaf nodes of the deinterleave tree:
-  auto *A = dyn_cast<ExtractValueInst>(*(DI1->user_begin()));
-  auto *C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
-  auto *B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
-  auto *D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
-  // Make sure that the A,B,C and D are ExtractValue instructions before getting
-  // the extract index
-  if (!A || !B || !C || !D)
-    return false;
-
-  DeinterleavedValues.resize(4);
-  // Place the values into the vector in the order of deinterleave4:
-  DeinterleavedValues[0x3 &
-                      ((A->getIndices()[0] * 2) + Extr1->getIndices()[0])] = A;
-  DeinterleavedValues[0x3 &
-                      ((B->getIndices()[0] * 2) + Extr2->getIndices()[0])] = B;
-  DeinterleavedValues[0x3 &
-                      ((C->getIndices()[0] * 2) + Extr1->getIndices()[0])] = C;
-  DeinterleavedValues[0x3 &
-                      ((D->getIndices()[0] * 2) + Extr2->getIndices()[0])] = D;
-  if (!DeinterleavedValues[0] || !DeinterleavedValues[1] ||
-      !DeinterleavedValues[2] || !DeinterleavedValues[3])
-    return false;
-
-  // Make sure that A,B,C,D match the deinterleave tree pattern
-  if (!match(DeinterleavedValues[0], m_ExtractValue<0>(m_Deinterleave2(
-                                         m_ExtractValue<0>(m_Specific(DI))))) ||
-      !match(DeinterleavedValues[1], m_ExtractValue<0>(m_Deinterleave2(
-                                         m_ExtractValue<1>(m_Specific(DI))))) ||
-      !match(DeinterleavedValues[2], m_ExtractValue<1>(m_Deinterleave2(
-                                         m_ExtractValue<0>(m_Specific(DI))))) ||
-      !match(DeinterleavedValues[3], m_ExtractValue<1>(m_Deinterleave2(
-                                         m_ExtractValue<1>(m_Specific(DI)))))) {
-    LLVM_DEBUG(dbgs() << "matching deinterleave4 failed\n");
-    return false;
-  }
-
-  // These Values will not be used anymore,
-  // DI4 will be created instead of nested DI1 and DI2
-  DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
-                               DeinterleavedValues.begin(),
-                               DeinterleavedValues.end());
-  DeInterleaveDeadInsts.push_back(cast<Instruction>(DI1));
-  DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr1));
-  DeInterleaveDeadInsts.push_back(cast<Instruction>(DI2));
-  DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr2));
-
-  return true;
-}
-
-bool getDeinterleavedValues(
-    Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
-    SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
-  if (getDeinterleave4Values(DI, DeinterleavedValues, DeInterleaveDeadInsts))
-    return true;
-  return getDeinterleave2Values(DI, DeinterleavedValues, DeInterleaveDeadInsts);
-}
-
 bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
     IntrinsicInst *DI, LoadInst *LI,
-    SmallVectorImpl<Instruction *> &DeadInsts) const {
-  // Only deinterleave2 supported at present.
-  if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
-    return false;
-
-  SmallVector<Instruction *, 4> DeinterleavedValues;
-  SmallVector<Instruction *, 8> DeInterleaveDeadInsts;
-
-  if (!getDeinterleavedValues(DI, DeinterleavedValues, DeInterleaveDeadInsts)) {
-    LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
-    return false;
-  }
+    ArrayRef<Value *> DeinterleavedValues) const {
   unsigned Factor = DeinterleavedValues.size();
   assert((Factor == 2 || Factor == 4) &&
          "Currently supported Factor is 2 or 4 only");
@@ -17666,67 +17533,12 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
       DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
     }
   }
-  DeadInsts.insert(DeadInsts.end(), DeInterleaveDeadInsts.begin(),
-                   DeInterleaveDeadInsts.end());
   return true;
 }
 
-/*
-InterleaveIntrinsic tree.
-          A    C         B    D
-           \  /           \  /
-           [II]           [II]
-                 \     /
-                  [II]
-
-values in correct order of interleave4: A B C D.
-Returns true if `II` is the root of an IR tree that represents a theoretical
-vector.interleave4 intrinsic. When true is returned, \p `InterleavedValues`
-vector is populated with the inputs such an intrinsic would take: (i.e.
-vector.interleave4(A, B, C, D)).
-*/
-bool getValuesToInterleave(
-    Value *II, SmallVectorImpl<Value *> &InterleavedValues,
-    SmallVectorImpl<Instruction *> &InterleaveDeadInsts) {
-  Value *A, *B, *C, *D;
-  // Try to match interleave of Factor 4
-  if (match(II, m_Interleave2(m_Interleave2(m_Value(A), m_Value(C)),
-                              m_Interleave2(m_Value(B), m_Value(D))))) {
-    InterleavedValues.push_back(A);
-    InterleavedValues.push_back(B);
-    InterleavedValues.push_back(C);
-    InterleavedValues.push_back(D);
-    // intermediate II will not be needed anymore
-    InterleaveDeadInsts.push_back(
-        cast<Instruction>(cast<Instruction>(II)->getOperand(0)));
-    InterleaveDeadInsts.push_back(
-        cast<Instruction>(cast<Instruction>(II)->getOperand(1)));
-    return true;
-  }
-
-  // Try to match interleave of Factor 2
-  if (match(II, m_Interleave2(m_Value(A), m_Value(B)))) {
-    InterleavedValues.push_back(A);
-    InterleavedValues.push_back(B);
-    return true;
-  }
-
-  return false;
-}
-
 bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
     IntrinsicInst *II, StoreInst *SI,
-    SmallVectorImpl<Instruction *> &DeadInsts) const {
-  // Only interleave2 supported at present.
-  if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
-    return false;
-
-  SmallVector<Value *, 4> InterleavedValues;
-  SmallVector<Instruction *, 2> InterleaveDeadInsts;
-  if (!getValuesToInterleave(II, InterleavedValues, InterleaveDeadInsts)) {
-    LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
-    return false;
-  }
+    ArrayRef<Value *> InterleavedValues) const {
   unsigned Factor = InterleavedValues.size();
   assert((Factor == 2 || Factor == 4) &&
          "Currently supported Factor is 2 or 4 only");
@@ -17762,9 +17574,11 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
         Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue());
 
   auto ExtractedValues = InterleavedValues;
+  SmallVector<Value *, 4> StoreOperands(InterleavedValues.begin(),
+                                        InterleavedValues.end());
   if (UseScalable)
-    InterleavedValues.push_back(Pred);
-  InterleavedValues.push_back(BaseAddr);
+    StoreOperands.push_back(Pred);
+  StoreOperands.push_back(BaseAddr);
   for (unsigned I = 0; I < NumStores; ++I) {
     Value *Address = BaseAddr;
     if (NumStores > 1) {
@@ -17773,16 +17587,14 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
       Value *Idx =
           Builder.getInt64(I * StTy->getElementCount().getKnownMinValue());
       for (unsigned J = 0; J < Factor; J++) {
-        InterleavedValues[J] =
+        StoreOperands[J] =
             Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
       }
       // update the address
-      InterleavedValues[InterleavedValues.size() - 1] = Address;
+      StoreOperands[StoreOperands.size() - 1] = Address;
     }
-    Builder.CreateCall(StNFunc, InterleavedValues);
+    Builder.CreateCall(StNFunc, StoreOperands);
   }
-  DeadInsts.insert(DeadInsts.end(), InterleaveDeadInsts.begin(),
-                   InterleaveDeadInsts.end());
   return true;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 61579de50db17e..3fa3d62cf2e532 10064...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/123863


More information about the llvm-commits mailing list