[llvm] [LV][AArch64]: Utilise SVE ld4/st4 instructions via auto-vectorisation (PR #89018)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 16 21:23:18 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 8e0a4a89f940d17b520bbca040981f54195d3ea4 4629ab0d5b252d03d090d541179350a596048460 -- llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/InterleavedAccessPass.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.h llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e233d430e9..7d3e97a9f7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -56,11 +56,11 @@
#include <cstdint>
#include <iterator>
#include <map>
+#include <queue>
+#include <stack>
#include <string>
#include <utility>
#include <vector>
-#include <stack>
-#include <queue>
namespace llvm {
@@ -3146,9 +3146,9 @@ public:
///
/// \p DI is the deinterleave intrinsic.
/// \p LI is the accompanying load instruction
- virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
- std::queue<std::pair<unsigned, Value*>>& LeafNodes,
- LoadInst *LI) const {
+ virtual bool lowerDeinterleaveIntrinsicToLoad(
+ IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+ LoadInst *LI) const {
return false;
}
@@ -3159,7 +3159,7 @@ public:
/// \p II is the interleave intrinsic.
/// \p SI is the accompanying store instruction
virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
- std::queue<Value*>& LeafNodes,
+ std::queue<Value *> &LeafNodes,
StoreInst *SI) const {
return false;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 73c3a63b61..e4e53b9b66 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -70,8 +70,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
-#include <utility>
#include <queue>
+#include <utility>
using namespace llvm;
@@ -511,28 +511,30 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
- std::stack<IntrinsicInst*> DeinterleaveTreeQueue;
- std::queue<std::pair<unsigned, Value*>> LeafNodes;
- std::map<IntrinsicInst*, bool>mp;
+ std::stack<IntrinsicInst *> DeinterleaveTreeQueue;
+ std::queue<std::pair<unsigned, Value *>> LeafNodes;
+ std::map<IntrinsicInst *, bool> mp;
SmallVector<Instruction *> TempDeadInsts;
DeinterleaveTreeQueue.push(DI);
unsigned DILeafCount = 0;
- while(!DeinterleaveTreeQueue.empty()) {
+ while (!DeinterleaveTreeQueue.empty()) {
auto CurrentDI = DeinterleaveTreeQueue.top();
DeinterleaveTreeQueue.pop();
TempDeadInsts.push_back(CurrentDI);
bool RootFound = false;
- for (auto UserExtract : CurrentDI->users()) { // iterate over extract users of deinterleave
+ for (auto UserExtract :
+ CurrentDI->users()) { // iterate over extract users of deinterleave
Instruction *Extract = dyn_cast<Instruction>(UserExtract);
if (!Extract || Extract->getOpcode() != Instruction::ExtractValue)
continue;
bool IsLeaf = true;
- for (auto UserDI : UserExtract->users()) { // iterate over deinterleave users of extract
+ for (auto UserDI :
+ UserExtract->users()) { // iterate over deinterleave users of extract
IntrinsicInst *Child_DI = dyn_cast<IntrinsicInst>(UserDI);
- if (!Child_DI ||
- Child_DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
- continue;
+ if (!Child_DI || Child_DI->getIntrinsicID() !=
+ Intrinsic::experimental_vector_deinterleave2)
+ continue;
IsLeaf = false;
if (mp.count(Child_DI) == 0) {
DeinterleaveTreeQueue.push(Child_DI);
@@ -543,8 +545,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
RootFound = true;
LeafNodes.push(std::make_pair(DILeafCount, UserExtract));
TempDeadInsts.push_back(Extract);
- }
- else {
+ } else {
TempDeadInsts.push_back(Extract);
}
}
@@ -556,7 +557,8 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
return false;
// We now have a target-specific load, so delete the old one.
- DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(), TempDeadInsts.rend());
+ DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(),
+ TempDeadInsts.rend());
DeadInsts.push_back(LI);
return true;
}
@@ -572,20 +574,21 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
return false;
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
- std::queue<IntrinsicInst*> IeinterleaveTreeQueue;
- std::queue<Value*> LeafNodes;
+ std::queue<IntrinsicInst *> IeinterleaveTreeQueue;
+ std::queue<Value *> LeafNodes;
SmallVector<Instruction *> TempDeadInsts;
IeinterleaveTreeQueue.push(II);
- while(!IeinterleaveTreeQueue.empty()) {
+ while (!IeinterleaveTreeQueue.empty()) {
auto node = IeinterleaveTreeQueue.front();
TempDeadInsts.push_back(node);
IeinterleaveTreeQueue.pop();
- for(unsigned i = 0; i < 2; i++) {
+ for (unsigned i = 0; i < 2; i++) {
auto op = node->getOperand(i);
- if(auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
- if (CurrentII->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
- continue;
+ if (auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
+ if (CurrentII->getIntrinsicID() !=
+ Intrinsic::experimental_vector_interleave2)
+ continue;
IeinterleaveTreeQueue.push(CurrentII);
continue;
}
@@ -619,7 +622,8 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
// with a factor of 2.
if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
- else if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+ else if (II->getIntrinsicID() ==
+ Intrinsic::experimental_vector_interleave2)
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ab8c01e2df..be9b72f4b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16345,15 +16345,18 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
}
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
- IntrinsicInst *DI, std::queue<std::pair<unsigned, llvm::Value*>>& LeafNodes, LoadInst *LI) const {
+ IntrinsicInst *DI,
+ std::queue<std::pair<unsigned, llvm::Value *>> &LeafNodes,
+ LoadInst *LI) const {
// Only deinterleave2 supported at present.
if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
return false;
const unsigned Factor = std::max(2, (int)LeafNodes.size());
- VectorType *VTy = (LeafNodes.size() > 0) ? cast<VectorType>(LeafNodes.front().second->getType()) :
- cast<VectorType>(DI->getType()->getContainedType(0));
+ VectorType *VTy = (LeafNodes.size() > 0)
+ ? cast<VectorType>(LeafNodes.front().second->getType())
+ : cast<VectorType>(DI->getType()->getContainedType(0));
const DataLayout &DL = DI->getModule()->getDataLayout();
bool UseScalable;
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16417,20 +16420,20 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
while (!LeafNodes.empty()) {
unsigned ExtractIndex = LeafNodes.front().first;
- llvm::Value* CurrentExtract = LeafNodes.front().second;
+ llvm::Value *CurrentExtract = LeafNodes.front().second;
LeafNodes.pop();
- ExtractValueInst* ExtractValueInst = dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
-
+ ExtractValueInst *ExtractValueInst =
+ dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
+
SmallVector<unsigned, 4> NewIndices;
for (auto index : ExtractValueInst->indices())
NewIndices.push_back(index + ExtractIndex);
- Value *extrc =Builder.CreateExtractValue(Result, NewIndices);
+ Value *extrc = Builder.CreateExtractValue(Result, NewIndices);
CurrentExtract->replaceAllUsesWith(extrc);
}
return true;
- }
- else
+ } else
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
}
@@ -16439,7 +16442,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
- IntrinsicInst *II, std::queue<Value*>& LeafNodes, StoreInst *SI) const {
+ IntrinsicInst *II, std::queue<Value *> &LeafNodes, StoreInst *SI) const {
// Only interleave2 supported at present.
if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
return false;
@@ -16501,8 +16504,7 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
Args.push_back(Pred);
Args.push_back(Address);
Builder.CreateCall(StNFunc, Args);
- }
- else
+ } else
Builder.CreateCall(StNFunc, {L, R, Address});
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 85497a1f7a..d114f462d6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -682,12 +682,12 @@ public:
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
- std::queue<std::pair<unsigned, Value*>>& LeafNodes,
- LoadInst *LI) const override;
+ bool lowerDeinterleaveIntrinsicToLoad(
+ IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+ LoadInst *LI) const override;
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
- std::queue<Value*>& LeafNodes,
+ std::queue<Value *> &LeafNodes,
StoreInst *SI) const override;
bool isLegalAddImmediate(int64_t) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 35150928f0..51fe96b5cf 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3315,10 +3315,9 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
assert(Factor >= 2 && "Invalid interleave factor");
auto *VecVTy = cast<VectorType>(VecTy);
- unsigned MaxFactor = TLI->getMaxSupportedInterleaveFactor();
- if (VecTy->isScalableTy() &&
- (!ST->hasSVE() || Factor > MaxFactor))
- return InstructionCost::getInvalid();
+ unsigned MaxFactor = TLI->getMaxSupportedInterleaveFactor();
+ if (VecTy->isScalableTy() && (!ST->hasSVE() || Factor > MaxFactor))
+ return InstructionCost::getInvalid();
// Vectorization for masked interleaved accesses is only enabled for scalable
// VF.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 64e0a2bb1f..f98fbc581c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21024,9 +21024,9 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
return true;
}
-bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
- std::queue<std::pair<unsigned, Value*>>& LeafNodes,
- LoadInst *LI) const {
+bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
+ IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+ LoadInst *LI) const {
assert(LI->isSimple());
IRBuilder<> Builder(LI);
@@ -21037,8 +21037,10 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
unsigned Factor = std::max(2, (int)LeafNodes.size());
VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
- VectorType *ResVTy = (LeafNodes.size() > 0) ? cast<VectorType>(LeafNodes.front().second->getType()) :
- cast<VectorType>(DI->getType()->getContainedType(0));
+ VectorType *ResVTy =
+ (LeafNodes.size() > 0)
+ ? cast<VectorType>(LeafNodes.front().second->getType())
+ : cast<VectorType>(DI->getType()->getContainedType(0));
if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
LI->getPointerAddressSpace(),
@@ -21078,7 +21080,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
ExtractIndex = LeafNodes.front().first;
auto CurrentExtract = LeafNodes.front().second;
LeafNodes.pop();
- ExtractValueInst* ExtractValueInst = dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
+ ExtractValueInst *ExtractValueInst =
+ dyn_cast<llvm::ExtractValueInst>(CurrentExtract);
SmallVector<unsigned, 4> NewIndices;
for (auto index : ExtractValueInst->indices()) {
NewIndices.push_back(index + ExtractIndex);
@@ -21097,9 +21100,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
return true;
}
-bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
- std::queue<Value*>& LeafNodes,
- StoreInst *SI) const {
+bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
+ IntrinsicInst *II, std::queue<Value *> &LeafNodes, StoreInst *SI) const {
assert(SI->isSimple());
IRBuilder<> Builder(SI);
@@ -21137,14 +21139,14 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
{InVTy, XLenTy});
VL = Constant::getAllOnesValue(XLenTy);
SmallVector<Value *> Args;
- while (!LeafNodes.empty()) {
- Args.push_back(LeafNodes.front());
- LeafNodes.pop();
- }
- Args.push_back(SI->getPointerOperand());
- Args.push_back(VL);
- Builder.CreateCall(VssegNFunc, Args);
- return true;
+ while (!LeafNodes.empty()) {
+ Args.push_back(LeafNodes.front());
+ LeafNodes.pop();
+ }
+ Args.push_back(SI->getPointerOperand());
+ Args.push_back(VL);
+ Builder.CreateCall(VssegNFunc, Args);
+ return true;
}
Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 1f104cf3bc..3c16dcd9ae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -855,12 +855,12 @@ public:
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
- std::queue<std::pair<unsigned, Value*>>& LeafNodes,
- LoadInst *LI) const override;
+ bool lowerDeinterleaveIntrinsicToLoad(
+ IntrinsicInst *DI, std::queue<std::pair<unsigned, Value *>> &LeafNodes,
+ LoadInst *LI) const override;
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
- std::queue<Value*>& LeafNodes,
+ std::queue<Value *> &LeafNodes,
StoreInst *SI) const override;
bool supportKCFIBundles() const override { return true; }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 41f8c5a72c..7a2c7e3f8f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -151,10 +151,10 @@
#include <limits>
#include <map>
#include <memory>
+#include <queue>
#include <string>
#include <tuple>
#include <utility>
-#include <queue>
using namespace llvm;
@@ -461,22 +461,22 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
// must use intrinsics to interleave.
if (VecTy->isScalableTy()) {
SmallVector<Value *> Vecs(Vals);
- unsigned AllNodesNum = (2*Vals.size()) - 1;
+ unsigned AllNodesNum = (2 * Vals.size()) - 1;
// last element in the vec should be the final interleaved result,
// so, skip processing last element.
- AllNodesNum --;
+ AllNodesNum--;
// interleave each 2 consecutive nodes, and push result to the vec,
// so that we can interleave the interleaved results again if we have
// more than 2 vectors to interleave.
- for (unsigned i = 0; i < AllNodesNum; i +=2) {
+ for (unsigned i = 0; i < AllNodesNum; i += 2) {
VectorType *VecTy = cast<VectorType>(Vecs[i]->getType());
VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
auto InterleavedVec = Builder.CreateIntrinsic(
- WideVecTy, Intrinsic::experimental_vector_interleave2,
- {Vecs[i], Vecs[i+1]}, /*FMFSource=*/nullptr, Name);
+ WideVecTy, Intrinsic::experimental_vector_interleave2,
+ {Vecs[i], Vecs[i + 1]}, /*FMFSource=*/nullptr, Name);
Vecs.push_back(InterleavedVec);
}
- return Vecs[Vecs.size()-1];
+ return Vecs[Vecs.size() - 1];
}
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2533,7 +2533,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
unsigned Part, Value *MaskForGaps) -> Value * {
if (VF.isScalable()) {
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
- assert(isPowerOf2_32(InterleaveFactor) &&
+ assert(isPowerOf2_32(InterleaveFactor) &&
"Unsupported deinterleave factor for scalable vectors");
auto *BlockInMaskPart = State.get(BlockInMask, Part);
SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
@@ -2586,25 +2586,27 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
if (VecTy->isScalableTy()) {
- assert(isPowerOf2_32(InterleaveFactor) &&
- "Unsupported deinterleave factor for scalable vectors");
+ assert(isPowerOf2_32(InterleaveFactor) &&
+ "Unsupported deinterleave factor for scalable vectors");
for (unsigned Part = 0; Part < UF; ++Part) {
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
-
- std::queue<Value *>Queue;
+
+ std::queue<Value *> Queue;
Queue.push(NewLoads[Part]);
// NonLeaf represents how many times we will do deinterleaving,
- // think of it as a tree, each node will be deinterleaved, untill we reach to
- // the leaf nodes which will be the final results of deinterleaving.
+ // think of it as a tree, each node will be deinterleaved, untill we
+ // reach to the leaf nodes which will be the final results of
+ // deinterleaving.
unsigned NonLeaf = InterleaveFactor - 1;
- for (unsigned i = 0; i < NonLeaf; i ++) {
+ for (unsigned i = 0; i < NonLeaf; i++) {
auto Node = Queue.front();
Queue.pop();
auto DeinterleaveType = Node->getType();
Value *DI = Builder.CreateIntrinsic(
- Intrinsic::experimental_vector_deinterleave2, DeinterleaveType, Node,
- /*FMFSource=*/nullptr, "root.strided.vec");
+ Intrinsic::experimental_vector_deinterleave2, DeinterleaveType,
+ Node,
+ /*FMFSource=*/nullptr, "root.strided.vec");
Value *StridedVec1 = Builder.CreateExtractValue(DI, 0);
Value *StridedVec2 = Builder.CreateExtractValue(DI, 1);
Queue.push(StridedVec1);
@@ -2712,7 +2714,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
// Interleave all the smaller vectors into one wider vector.
Value *IVec = interleaveVectors(Builder, StoredVecs, "interleaved.vec");
- //LLVM_DEBUG(dbgs() << "interleaved vec: "; IVec->dump());
+ // LLVM_DEBUG(dbgs() << "interleaved vec: "; IVec->dump());
Instruction *NewStoreInstr;
if (BlockInMask || MaskForGaps) {
Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
@@ -8691,8 +8693,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
// For scalable vectors, the only interleave factor currently supported
- // is a (power of 2) factor, since we require the (de)interleave2 intrinsics instead of
- // shufflevectors, so we can do (de)interleave2 recursively.
+ // is a (power of 2) factor, since we require the (de)interleave2
+ // intrinsics instead of shufflevectors, so we can do (de)interleave2
+ // recursively.
assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
"Unsupported interleave factor for scalable vectors");
return Result;
``````````
</details>
https://github.com/llvm/llvm-project/pull/89018
More information about the llvm-commits
mailing list