[llvm] [LV][AArch64]: Utilise SVE ld4/st4 instructions via auto-vectorisation (PR #89018)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 21:23:18 PDT 2024


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->


:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 8e0a4a89f940d17b520bbca040981f54195d3ea4 4629ab0d5b252d03d090d541179350a596048460 -- llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/InterleavedAccessPass.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.h llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e233d430e9..7d3e97a9f7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -56,11 +56,11 @@
 #include <cstdint>
 #include <iterator>
 #include <map>
+#include <queue>
+#include <stack>
 #include <string>
 #include <utility>
 #include <vector>
-#include <stack>
-#include <queue>
 
 namespace llvm {
 
@@ -3146,9 +3146,9 @@ public:
   ///
   /// \p DI is the deinterleave intrinsic.
   /// \p LI is the accompanying load instruction
-  virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
-                                                std::queue<std::pair<unsigned, Value*>>& LeafNodes,
-                                                LoadInst *LI) const {
+  virtual bool lowerDeinterleaveIntrinsicToLoad(
+      IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+      LoadInst *LI) const {
     return false;
   }
 
@@ -3159,7 +3159,7 @@ public:
   /// \p II is the interleave intrinsic.
   /// \p SI is the accompanying store instruction
   virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
-                                               std::queue<Value*>& LeafNodes,
+                                               std::queue<Value *> &LeafNodes,
                                                StoreInst *SI) const {
     return false;
   }
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 73c3a63b61..e4e53b9b66 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -70,8 +70,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
-#include <utility>
 #include <queue>
+#include <utility>
 
 using namespace llvm;
 
@@ -511,28 +511,30 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
 
   LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
 
-  std::stack<IntrinsicInst*> DeinterleaveTreeQueue;
-  std::queue<std::pair<unsigned, Value*>> LeafNodes;
-  std::map<IntrinsicInst*, bool>mp;
+  std::stack<IntrinsicInst *> DeinterleaveTreeQueue;
+  std::queue<std::pair<unsigned, Value *>> LeafNodes;
+  std::map<IntrinsicInst *, bool> mp;
   SmallVector<Instruction *> TempDeadInsts;
 
   DeinterleaveTreeQueue.push(DI);
   unsigned DILeafCount = 0;
-  while(!DeinterleaveTreeQueue.empty()) {
+  while (!DeinterleaveTreeQueue.empty()) {
     auto CurrentDI = DeinterleaveTreeQueue.top();
     DeinterleaveTreeQueue.pop();
     TempDeadInsts.push_back(CurrentDI);
     bool RootFound = false;
-    for (auto UserExtract : CurrentDI->users()) { // iterate over extract users of deinterleave
+    for (auto UserExtract :
+         CurrentDI->users()) { // iterate over extract users of deinterleave
       Instruction *Extract = dyn_cast<Instruction>(UserExtract);
       if (!Extract || Extract->getOpcode() != Instruction::ExtractValue)
         continue;
       bool IsLeaf = true;
-      for (auto UserDI : UserExtract->users()) { // iterate over deinterleave users of extract
+      for (auto UserDI :
+           UserExtract->users()) { // iterate over deinterleave users of extract
         IntrinsicInst *Child_DI = dyn_cast<IntrinsicInst>(UserDI);
-        if (!Child_DI || 
-            Child_DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
-            continue;
+        if (!Child_DI || Child_DI->getIntrinsicID() !=
+                             Intrinsic::experimental_vector_deinterleave2)
+          continue;
         IsLeaf = false;
         if (mp.count(Child_DI) == 0) {
           DeinterleaveTreeQueue.push(Child_DI);
@@ -543,8 +545,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
         RootFound = true;
         LeafNodes.push(std::make_pair(DILeafCount, UserExtract));
         TempDeadInsts.push_back(Extract);
-      }
-      else {
+      } else {
         TempDeadInsts.push_back(Extract);
       }
     }
@@ -556,7 +557,8 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
     return false;
 
   // We now have a target-specific load, so delete the old one.
-  DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(), TempDeadInsts.rend());
+  DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(),
+                   TempDeadInsts.rend());
   DeadInsts.push_back(LI);
   return true;
 }
@@ -572,20 +574,21 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
     return false;
 
   LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
-  std::queue<IntrinsicInst*> IeinterleaveTreeQueue;
-  std::queue<Value*> LeafNodes;
+  std::queue<IntrinsicInst *> IeinterleaveTreeQueue;
+  std::queue<Value *> LeafNodes;
   SmallVector<Instruction *> TempDeadInsts;
 
   IeinterleaveTreeQueue.push(II);
-  while(!IeinterleaveTreeQueue.empty()) {
+  while (!IeinterleaveTreeQueue.empty()) {
     auto node = IeinterleaveTreeQueue.front();
     TempDeadInsts.push_back(node);
     IeinterleaveTreeQueue.pop();
-    for(unsigned i = 0; i < 2; i++) {
+    for (unsigned i = 0; i < 2; i++) {
       auto op = node->getOperand(i);
-      if(auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
-        if (CurrentII->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
-            continue;
+      if (auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
+        if (CurrentII->getIntrinsicID() !=
+            Intrinsic::experimental_vector_interleave2)
+          continue;
         IeinterleaveTreeQueue.push(CurrentII);
         continue;
       }
@@ -619,7 +622,8 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
       // with a factor of 2.
       if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
         Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
-      else if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+      else if (II->getIntrinsicID() ==
+               Intrinsic::experimental_vector_interleave2)
         Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
     }
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ab8c01e2df..be9b72f4b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16345,15 +16345,18 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
 }
 
 bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
-    IntrinsicInst *DI, std::queue<std::pair<unsigned, llvm::Value*>>& LeafNodes, LoadInst *LI) const {
+    IntrinsicInst *DI,
+    std::queue<std::pair<unsigned, llvm::Value *>> &LeafNodes,
+    LoadInst *LI) const {
   // Only deinterleave2 supported at present.
   if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
     return false;
 
   const unsigned Factor = std::max(2, (int)LeafNodes.size());
 
-  VectorType *VTy = (LeafNodes.size() > 0) ? cast<VectorType>(LeafNodes.front().second->getType()) :
-                    cast<VectorType>(DI->getType()->getContainedType(0));
+  VectorType *VTy = (LeafNodes.size() > 0)
+                        ? cast<VectorType>(LeafNodes.front().second->getType())
+                        : cast<VectorType>(DI->getType()->getContainedType(0));
   const DataLayout &DL = DI->getModule()->getDataLayout();
   bool UseScalable;
   if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16417,20 +16420,20 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
       }
       while (!LeafNodes.empty()) {
         unsigned ExtractIndex = LeafNodes.front().first;
-        llvm::Value* CurrentExtract = LeafNodes.front().second;
+        llvm::Value *CurrentExtract = LeafNodes.front().second;
         LeafNodes.pop();
-        ExtractValueInst* ExtractValueInst = dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
-      
+        ExtractValueInst *ExtractValueInst =
+            dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
+
         SmallVector<unsigned, 4> NewIndices;
         for (auto index : ExtractValueInst->indices())
           NewIndices.push_back(index + ExtractIndex);
 
-        Value *extrc =Builder.CreateExtractValue(Result, NewIndices);
+        Value *extrc = Builder.CreateExtractValue(Result, NewIndices);
         CurrentExtract->replaceAllUsesWith(extrc);
       }
       return true;
-    }
-    else
+    } else
       Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
   }
 
@@ -16439,7 +16442,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
 }
 
 bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
-    IntrinsicInst *II, std::queue<Value*>& LeafNodes, StoreInst *SI) const {
+    IntrinsicInst *II, std::queue<Value *> &LeafNodes, StoreInst *SI) const {
   // Only interleave2 supported at present.
   if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
     return false;
@@ -16501,8 +16504,7 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
       Args.push_back(Pred);
       Args.push_back(Address);
       Builder.CreateCall(StNFunc, Args);
-    }
-    else
+    } else
       Builder.CreateCall(StNFunc, {L, R, Address});
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 85497a1f7a..d114f462d6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -682,12 +682,12 @@ public:
   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
                              unsigned Factor) const override;
 
-  bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
-                                        std::queue<std::pair<unsigned, Value*>>& LeafNodes,
-                                        LoadInst *LI) const override;
+  bool lowerDeinterleaveIntrinsicToLoad(
+      IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+      LoadInst *LI) const override;
 
   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
-                                       std::queue<Value*>& LeafNodes,
+                                       std::queue<Value *> &LeafNodes,
                                        StoreInst *SI) const override;
 
   bool isLegalAddImmediate(int64_t) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 35150928f0..51fe96b5cf 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3315,10 +3315,9 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
   assert(Factor >= 2 && "Invalid interleave factor");
   auto *VecVTy = cast<VectorType>(VecTy);
 
- unsigned MaxFactor = TLI->getMaxSupportedInterleaveFactor();
- if (VecTy->isScalableTy() &&
-    (!ST->hasSVE() || Factor > MaxFactor))
-   return InstructionCost::getInvalid();
+  unsigned MaxFactor = TLI->getMaxSupportedInterleaveFactor();
+  if (VecTy->isScalableTy() && (!ST->hasSVE() || Factor > MaxFactor))
+    return InstructionCost::getInvalid();
 
   // Vectorization for masked interleaved accesses is only enabled for scalable
   // VF.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 64e0a2bb1f..f98fbc581c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21024,9 +21024,9 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
   return true;
 }
 
-bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
-                                                           std::queue<std::pair<unsigned, Value*>>& LeafNodes,
-                                                           LoadInst *LI) const {
+bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
+    IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+    LoadInst *LI) const {
   assert(LI->isSimple());
   IRBuilder<> Builder(LI);
 
@@ -21037,8 +21037,10 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
   unsigned Factor = std::max(2, (int)LeafNodes.size());
 
   VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
-  VectorType *ResVTy = (LeafNodes.size() > 0) ? cast<VectorType>(LeafNodes.front().second->getType()) :
-                        cast<VectorType>(DI->getType()->getContainedType(0));
+  VectorType *ResVTy =
+      (LeafNodes.size() > 0)
+          ? cast<VectorType>(LeafNodes.front().second->getType())
+          : cast<VectorType>(DI->getType()->getContainedType(0));
 
   if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
                                     LI->getPointerAddressSpace(),
@@ -21078,7 +21080,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
       ExtractIndex = LeafNodes.front().first;
       auto CurrentExtract = LeafNodes.front().second;
       LeafNodes.pop();
-      ExtractValueInst* ExtractValueInst = dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
+      ExtractValueInst *ExtractValueInst =
+          dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
       SmallVector<unsigned, 4> NewIndices;
       for (auto index : ExtractValueInst->indices()) {
         NewIndices.push_back(index + ExtractIndex);
@@ -21097,9 +21100,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
   return true;
 }
 
-bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
-                                                          std::queue<Value*>& LeafNodes,
-                                                          StoreInst *SI) const {
+bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
+    IntrinsicInst *II, std::queue<Value *> &LeafNodes, StoreInst *SI) const {
   assert(SI->isSimple());
   IRBuilder<> Builder(SI);
 
@@ -21137,14 +21139,14 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
                                            {InVTy, XLenTy});
     VL = Constant::getAllOnesValue(XLenTy);
     SmallVector<Value *> Args;
-      while (!LeafNodes.empty()) {
-        Args.push_back(LeafNodes.front());
-        LeafNodes.pop();
-      }
-      Args.push_back(SI->getPointerOperand());
-      Args.push_back(VL);
-      Builder.CreateCall(VssegNFunc, Args);
-      return true;
+    while (!LeafNodes.empty()) {
+      Args.push_back(LeafNodes.front());
+      LeafNodes.pop();
+    }
+    Args.push_back(SI->getPointerOperand());
+    Args.push_back(VL);
+    Builder.CreateCall(VssegNFunc, Args);
+    return true;
   }
 
   Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 1f104cf3bc..3c16dcd9ae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -855,12 +855,12 @@ public:
   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
                              unsigned Factor) const override;
 
-  bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
-                                        std::queue<std::pair<unsigned, Value*>>& LeafNodes,
-                                        LoadInst *LI) const override;
+  bool lowerDeinterleaveIntrinsicToLoad(
+      IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+      LoadInst *LI) const override;
 
   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
-                                       std::queue<Value*>& LeafNodes,
+                                       std::queue<Value *> &LeafNodes,
                                        StoreInst *SI) const override;
 
   bool supportKCFIBundles() const override { return true; }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 41f8c5a72c..7a2c7e3f8f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -151,10 +151,10 @@
 #include <limits>
 #include <map>
 #include <memory>
+#include <queue>
 #include <string>
 #include <tuple>
 #include <utility>
-#include <queue>
 
 using namespace llvm;
 
@@ -461,22 +461,22 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
   // must use intrinsics to interleave.
   if (VecTy->isScalableTy()) {
     SmallVector<Value *> Vecs(Vals);
-    unsigned AllNodesNum = (2*Vals.size()) - 1;
+    unsigned AllNodesNum = (2 * Vals.size()) - 1;
     // last element in the vec should be the final interleaved result,
     // so, skip processing last element.
-    AllNodesNum --;
+    AllNodesNum--;
     // interleave each 2 consecutive nodes, and push result to the vec,
     // so that we can interleave the interleaved results again if we have
     // more than 2 vectors to interleave.
-    for (unsigned i = 0; i < AllNodesNum; i +=2) {
+    for (unsigned i = 0; i < AllNodesNum; i += 2) {
       VectorType *VecTy = cast<VectorType>(Vecs[i]->getType());
       VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
       auto InterleavedVec = Builder.CreateIntrinsic(
-        WideVecTy, Intrinsic::experimental_vector_interleave2,
-        {Vecs[i], Vecs[i+1]}, /*FMFSource=*/nullptr, Name);
+          WideVecTy, Intrinsic::experimental_vector_interleave2,
+          {Vecs[i], Vecs[i + 1]}, /*FMFSource=*/nullptr, Name);
       Vecs.push_back(InterleavedVec);
     }
-    return Vecs[Vecs.size()-1];
+    return Vecs[Vecs.size() - 1];
   }
 
   // Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2533,7 +2533,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
                              unsigned Part, Value *MaskForGaps) -> Value * {
     if (VF.isScalable()) {
       assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
-      assert(isPowerOf2_32(InterleaveFactor)  &&
+      assert(isPowerOf2_32(InterleaveFactor) &&
              "Unsupported deinterleave factor for scalable vectors");
       auto *BlockInMaskPart = State.get(BlockInMask, Part);
       SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
@@ -2586,25 +2586,27 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
     }
 
     if (VecTy->isScalableTy()) {
-      assert(isPowerOf2_32(InterleaveFactor)  &&
-            "Unsupported deinterleave factor for scalable vectors");
+      assert(isPowerOf2_32(InterleaveFactor) &&
+             "Unsupported deinterleave factor for scalable vectors");
       for (unsigned Part = 0; Part < UF; ++Part) {
         // Scalable vectors cannot use arbitrary shufflevectors (only splats),
         // so must use intrinsics to deinterleave.
-        
-        std::queue<Value *>Queue;
+
+        std::queue<Value *> Queue;
         Queue.push(NewLoads[Part]);
         // NonLeaf represents how many times we will do deinterleaving,
-        // think of it as a tree, each node will be deinterleaved, untill we reach to
-        // the leaf nodes which will be the final results of deinterleaving.
+        // think of it as a tree, each node will be deinterleaved, untill we
+        // reach to the leaf nodes which will be the final results of
+        // deinterleaving.
         unsigned NonLeaf = InterleaveFactor - 1;
-        for (unsigned i = 0; i < NonLeaf; i ++) {
+        for (unsigned i = 0; i < NonLeaf; i++) {
           auto Node = Queue.front();
           Queue.pop();
           auto DeinterleaveType = Node->getType();
           Value *DI = Builder.CreateIntrinsic(
-            Intrinsic::experimental_vector_deinterleave2, DeinterleaveType, Node,
-            /*FMFSource=*/nullptr, "root.strided.vec");
+              Intrinsic::experimental_vector_deinterleave2, DeinterleaveType,
+              Node,
+              /*FMFSource=*/nullptr, "root.strided.vec");
           Value *StridedVec1 = Builder.CreateExtractValue(DI, 0);
           Value *StridedVec2 = Builder.CreateExtractValue(DI, 1);
           Queue.push(StridedVec1);
@@ -2712,7 +2714,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
 
     // Interleave all the smaller vectors into one wider vector.
     Value *IVec = interleaveVectors(Builder, StoredVecs, "interleaved.vec");
-    //LLVM_DEBUG(dbgs() << "interleaved vec: "; IVec->dump());
+    // LLVM_DEBUG(dbgs() << "interleaved vec: "; IVec->dump());
     Instruction *NewStoreInstr;
     if (BlockInMask || MaskForGaps) {
       Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
@@ -8691,8 +8693,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
                      CM.getWideningDecision(IG->getInsertPos(), VF) ==
                          LoopVectorizationCostModel::CM_Interleave);
       // For scalable vectors, the only interleave factor currently supported
-      // is a (power of 2) factor, since we require the (de)interleave2 intrinsics instead of
-      // shufflevectors, so we can do (de)interleave2 recursively.
+      // is a (power of 2) factor, since we require the (de)interleave2
+      // intrinsics instead of shufflevectors, so we can do (de)interleave2
+      // recursively.
       assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
              "Unsupported interleave factor for scalable vectors");
       return Result;

``````````

</details>


https://github.com/llvm/llvm-project/pull/89018


More information about the llvm-commits mailing list