[llvm] [LV][AArch64]: Utilise SVE ld4/st4 instructions via auto-vectorisation (PR #89018)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 17 03:41:34 PDT 2024
================
@@ -2572,23 +2586,40 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
if (VecTy->isScalableTy()) {
- assert(InterleaveFactor == 2 &&
- "Unsupported deinterleave factor for scalable vectors");
-
+ assert(isPowerOf2_32(InterleaveFactor) &&
+ "Unsupported deinterleave factor for scalable vectors");
for (unsigned Part = 0; Part < UF; ++Part) {
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
- Value *DI = Builder.CreateIntrinsic(
- Intrinsic::experimental_vector_deinterleave2, VecTy, NewLoads[Part],
- /*FMFSource=*/nullptr, "strided.vec");
+
+ std::queue<Value *>Queue;
+ Queue.push(NewLoads[Part]);
+ // NonLeaf represents how many times we will do deinterleaving,
+ // think of it as a tree, each node will be deinterleaved, untill we reach to
+ // the leaf nodes which will be the final results of deinterleaving.
+ unsigned NonLeaf = InterleaveFactor - 1;
+ for (unsigned i = 0; i < NonLeaf; i ++) {
+ auto Node = Queue.front();
+ Queue.pop();
+ auto DeinterleaveType = Node->getType();
+ Value *DI = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_deinterleave2, DeinterleaveType, Node,
+ /*FMFSource=*/nullptr, "root.strided.vec");
+ Value *StridedVec1 = Builder.CreateExtractValue(DI, 0);
+ Value *StridedVec2 = Builder.CreateExtractValue(DI, 1);
+ Queue.push(StridedVec1);
+ Queue.push(StridedVec2);
+ }
+
unsigned J = 0;
- for (unsigned I = 0; I < InterleaveFactor; ++I) {
+ for (unsigned I = 0; I < InterleaveFactor && !Queue.empty(); ++I) {
Instruction *Member = Group->getMember(I);
if (!Member)
continue;
- Value *StridedVec = Builder.CreateExtractValue(DI, I);
+ auto StridedVec = Queue.front();
+ Queue.pop();
----------------
Mel-Chen wrote:
Here is a example:
A vector 0 1 2 3 4 5 6 7
If we do deinterleave 4 on the vector, we should get:
member 0: 0 4
member 1: 1 5
member 2: 2 6
member 3: 3 7
But the Queue in your change may like: 0 4, 2 6, 1 5, 3 7.
Please confirm the Queue is sorted by a correct rank.
https://github.com/llvm/llvm-project/pull/89018
More information about the llvm-commits
mailing list