[llvm] f74879c - [SLP]Make PHICompare comparator follow weak strict ordering requirement
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 11:23:51 PDT 2024
Author: Alexey Bataev
Date: 2024-10-04T14:23:48-04:00
New Revision: f74879cf0cf3e6d1f4c510627a7343ab09485e98
URL: https://github.com/llvm/llvm-project/commit/f74879cf0cf3e6d1f4c510627a7343ab09485e98
DIFF: https://github.com/llvm/llvm-project/commit/f74879cf0cf3e6d1f4c510627a7343ab09485e98.diff
LOG: [SLP]Make PHICompare comparator follow weak strict ordering requirement
Reviewers: efriedma-quic
Reviewed By: efriedma-quic
Pull Request: https://github.com/llvm/llvm-project/pull/110529
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6dd6805a017e4..dc9ad5335f8a52 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5448,6 +5448,33 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
+ SmallVector<Instruction *> UserBVHead(TE.Scalars.size());
+ for (auto [I, V] : zip(UserBVHead, TE.Scalars)) {
+ if (!V->hasNUsesOrMore(1))
+ continue;
+ auto *II = dyn_cast<InsertElementInst>(*V->user_begin());
+ if (!II)
+ continue;
+ Instruction *BVHead = nullptr;
+ BasicBlock *BB = II->getParent();
+ while (II && II->hasOneUse() && II->getParent() == BB) {
+ BVHead = II;
+ II = dyn_cast<InsertElementInst>(II->getOperand(0));
+ }
+ I = BVHead;
+ }
+
+ auto CompareByBasicBlocks = [&](BasicBlock *BB1, BasicBlock *BB2) {
+ assert(BB1 != BB2 && "Expected
diff erent basic blocks.");
+ auto *NodeA = DT->getNode(BB1);
+ auto *NodeB = DT->getNode(BB2);
+ assert(NodeA && "Should only process reachable instructions");
+ assert(NodeB && "Should only process reachable instructions");
+ assert((NodeA == NodeB) ==
+ (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
+ "Different nodes should have
diff erent DFS numbers");
+ return NodeA->getDFSNumIn() < NodeB->getDFSNumIn();
+ };
auto PHICompare = [&](unsigned I1, unsigned I2) {
Value *V1 = TE.Scalars[I1];
Value *V2 = TE.Scalars[I2];
@@ -5459,21 +5486,56 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return false;
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
- if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
- if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
- if (!areTwoInsertFromSameBuildVector(
- IE1, IE2,
- [](InsertElementInst *II) { return II->getOperand(0); }))
- return I1 < I2;
+ if (FirstUserOfPhi1->getParent() != FirstUserOfPhi2->getParent())
+ return CompareByBasicBlocks(FirstUserOfPhi1->getParent(),
+ FirstUserOfPhi2->getParent());
+ auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1);
+ auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2);
+ auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1);
+ auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2);
+ if (IE1 && !IE2)
+ return true;
+ if (!IE1 && IE2)
+ return false;
+ if (IE1 && IE2) {
+ if (UserBVHead[I1] && !UserBVHead[I2])
+ return true;
+ if (!UserBVHead[I1])
+ return false;
+ if (UserBVHead[I1] == UserBVHead[I2])
return getElementIndex(IE1) < getElementIndex(IE2);
- }
- if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
- if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
- if (EE1->getOperand(0) != EE2->getOperand(0))
- return I1 < I2;
+ if (UserBVHead[I1]->getParent() != UserBVHead[I2]->getParent())
+ return CompareByBasicBlocks(UserBVHead[I1]->getParent(),
+ UserBVHead[I2]->getParent());
+ return UserBVHead[I1]->comesBefore(UserBVHead[I2]);
+ }
+ if (EE1 && !EE2)
+ return true;
+ if (!EE1 && EE2)
+ return false;
+ if (EE1 && EE2) {
+ auto *Inst1 = dyn_cast<Instruction>(EE1->getOperand(0));
+ auto *Inst2 = dyn_cast<Instruction>(EE2->getOperand(0));
+ auto *P1 = dyn_cast<Argument>(EE1->getOperand(0));
+ auto *P2 = dyn_cast<Argument>(EE2->getOperand(0));
+ if (!Inst2 && !P2)
+ return Inst1 || P1;
+ if (EE1->getOperand(0) == EE2->getOperand(0))
return getElementIndex(EE1) < getElementIndex(EE2);
+ if (!Inst1 && Inst2)
+ return false;
+ if (Inst1 && Inst2) {
+ if (Inst1->getParent() != Inst2->getParent())
+ return CompareByBasicBlocks(Inst1->getParent(), Inst2->getParent());
+ return Inst1->comesBefore(Inst2);
}
- return I1 < I2;
+ if (!P1 && P2)
+ return false;
+ assert(P1 && P2 &&
+ "Expected either instructions or arguments vector operands.");
+ return P1->getArgNo() < P2->getArgNo();
+ }
+ return false;
};
SmallDenseMap<unsigned, unsigned, 16> PhiToId;
SmallVector<unsigned> Phis(TE.Scalars.size());
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index dbc4f3d59d4f9b..d6073ea4bbbae6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -33,40 +33,40 @@ define void @test() {
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I67]], i32 14
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 6, i32 7, i32 7>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 23, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
-; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
+; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
; CHECK: [[BB167]]:
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
; CHECK: [[BB184]]:
; CHECK-NEXT: br label %[[BB185:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
index 34c068478c5f5e..d4b737a6bc4211 100644
--- a/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
@@ -10,9 +10,9 @@ define i1 @test() {
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: br label [[ELSE1:%.*]]
; CHECK: else1:
More information about the llvm-commits
mailing list