[llvm] [SLP] Create groups before sorting Phis (PR #111174)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 08:32:44 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nabeel Omer (omern1)
<details>
<summary>Changes</summary>
This fixes the comparator `PHICompare` by creating sorted groups of comparable instructions to avoid transitive weirdness in std::sort.
This was discussed in https://discourse.llvm.org/t/slp-vectorizer-phi-sorting-produces-different-results-between-platforms/81467/18.
---
Patch is 54.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111174.diff
21 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+128-28)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll (+11-10)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll (+26-28)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll (+6-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll (+6-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll (+11-7)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll (+3-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll (+4-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll (+7-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll (+3-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll (+52-26)
- (modified) llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll (+8-8)
- (modified) llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dee0b7e1f43714..1cfdf24ede765a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5441,40 +5441,140 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
- auto PHICompare = [&](unsigned I1, unsigned I2) {
- Value *V1 = TE.Scalars[I1];
- Value *V2 = TE.Scalars[I2];
- if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0))
- return false;
- if (V1->getNumUses() < V2->getNumUses())
- return true;
- if (V1->getNumUses() > V2->getNumUses())
- return false;
- auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
- auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
- if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
- if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
- if (!areTwoInsertFromSameBuildVector(
- IE1, IE2,
- [](InsertElementInst *II) { return II->getOperand(0); }))
- return I1 < I2;
- return getElementIndex(IE1) < getElementIndex(IE2);
- }
- if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
- if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
- if (EE1->getOperand(0) != EE2->getOperand(0))
- return I1 < I2;
- return getElementIndex(EE1) < getElementIndex(EE2);
- }
- return I1 < I2;
- };
DenseMap<unsigned, unsigned> PhiToId;
SmallVector<unsigned> Phis(TE.Scalars.size());
std::iota(Phis.begin(), Phis.end(), 0);
+
+ BitVector Seen(Phis.size());
+ SmallVector<SmallVector<unsigned>> Groups;
+ Groups.resize(Phis.size(), {});
+
+ for (auto const Phidx : Phis) {
+ // We've already found a group for this Phidx
+ if (Seen.test(Phidx))
+ continue;
+
+ Groups[Phidx].push_back(Phidx);
+
+ auto UserIterPhi1 = TE.Scalars[Phidx]->user_begin();
+ if (UserIterPhi1.atEnd())
+ continue;
+
+ auto *FirstUserOfPhi1 = cast<Instruction>(*UserIterPhi1);
+
+ unsigned Count = 0;
+ if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1)) {
+ auto Width = IE1->getType()->getElementCount();
+ assert(!Width.isScalable());
+ Count = Width.getFixedValue() - 1;
+ } else if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1)) {
+ // Count is unused in the case of extract element instructions.
+ Count = -1;
+ }
+
+ for (auto const PhidxB : ArrayRef(Phis).drop_front(Phidx + 1)) {
+
+ // At this point we know that we have found all the elements that fit in
+ // this group so we'll stop.
+ if (Count == 0)
+ break;
+
+ // B is already in a group so we don't want to look at it again.
+ if (Seen.test(PhidxB))
+ continue;
+
+ auto UserIterPhi2 = TE.Scalars[PhidxB]->user_begin();
+ if (UserIterPhi2.atEnd()) {
+ Seen.set(PhidxB);
+ continue;
+ }
+
+ auto *FirstUserOfPhi2 = cast<Instruction>(*UserIterPhi2);
+
+ if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
+ if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1)) {
+ if (areTwoInsertFromSameBuildVector(
+ IE1, IE2,
+ [](InsertElementInst *II) { return II->getOperand(0); })) {
+ Groups[Phidx].push_back(PhidxB);
+ Seen.set(PhidxB);
+ --Count;
+ continue;
+ }
+ }
+ } else if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1)) {
+ if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
+ if (EE1->getVectorOperand() == EE2->getVectorOperand()) {
+ Groups[Phidx].push_back(PhidxB);
+ Seen.set(PhidxB);
+ continue;
+ }
+ }
+ }
+ }
+
+ std::sort(
+ Groups[Phidx].begin(), Groups[Phidx].end(),
+ [TE](unsigned I1, unsigned I2) {
+ auto U1 = TE.Scalars[I1]->user_begin();
+ if (U1.atEnd())
+ return false;
+ auto U2 = TE.Scalars[I2]->user_begin();
+ if (U2.atEnd())
+ return true;
+
+ if (auto *FirstUserOfPhi1 =
+ dyn_cast<InsertElementInst>(*(U1))) {
+ if (auto *FirstUserOfPhi2 = dyn_cast<InsertElementInst>(
+ *(U2))) {
+ return getElementIndex(FirstUserOfPhi1) <
+ getElementIndex(FirstUserOfPhi2);
+ }
+ }
+
+ if (auto *FirstUserOfPhi1 =
+ dyn_cast<ExtractElementInst>(*(U1))) {
+ if (auto *FirstUserOfPhi2 = dyn_cast<ExtractElementInst>(
+ *(U2))) {
+ return FirstUserOfPhi1->getIndexOperand() <
+ FirstUserOfPhi2->getIndexOperand();
+ }
+ }
+
+ llvm_unreachable(
+ "Found something other than InsertElement or ExtractElement");
+ });
+ }
+
+ // Sort the groups on the basis of their ordering in the block.
+ std::sort(Groups.begin(), Groups.end(),
+ [TE](SmallVector<unsigned> I1, SmallVector<unsigned> I2) {
+ if (I1.empty())
+ return false;
+ if (I2.empty())
+ return true;
+
+ auto PhidxA = I1.front();
+ auto *InstA = TE.Scalars[PhidxA];
+ auto *Phi1 = cast<Instruction>(InstA);
+
+ auto PhidxB = I2.front();
+ auto *InstB = TE.Scalars[PhidxB];
+ auto *Phi2 = cast<Instruction>(InstB);
+
+ return Phi1->comesBefore(Phi2);
+ });
+
OrdersType ResOrder(TE.Scalars.size());
for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id)
PhiToId[Id] = Id;
- stable_sort(Phis, PHICompare);
+
+ Phis.clear();
+ for (auto const &group : Groups) {
+ for (auto const element : group)
+ Phis.push_back(element);
+ }
+
for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
ResOrder[Id] = PhiToId[Phis[Id]];
if (isIdentityOrder(ResOrder))
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll
index e4b6c06b79fc1b..4b99e2a8ff7958 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
-; YAML-NEXT: - Cost: '2'
+; YAML-NEXT: - Cost: '0'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
@@ -28,7 +28,7 @@
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
-; YAML-NEXT: - Cost: '2'
+; YAML-NEXT: - Cost: '3'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '9'
@@ -45,36 +45,37 @@ define void @foo() personality ptr @bar {
; CHECK: bb3:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i64> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 0, i64 0, i64 poison) [ "deopt"() ]
-; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]]
+; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]]
; CHECK: bb4:
; CHECK-NEXT: br i1 poison, label [[BB11:%.*]], label [[BB5:%.*]]
; CHECK: bb5:
; CHECK-NEXT: br label [[BB7:%.*]]
; CHECK: bb6:
-; CHECK-NEXT: [[TMP4]] = phi <2 x i64> [ <i64 0, i64 poison>, [[BB8:%.*]] ]
+; CHECK-NEXT: [[TMP4]] = phi <2 x i64> [ <i64 poison, i64 0>, [[BB8:%.*]] ]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb7:
; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i64 [ poison, [[BB8]] ], [ poison, [[BB5]] ]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_5_84111]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 poison, i64 poison, i64 poison) [ "deopt"() ]
-; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]]
+; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]]
; CHECK: bb8:
; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]]
; CHECK: bb9:
; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
-; CHECK-NEXT: [[TMP7]] = phi <2 x i64> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ]
+; CHECK-NEXT: [[TMP7]] = phi <2 x i64> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ]
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb10:
-; CHECK-NEXT: [[TMP8]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ]
; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i64 }
-; CHECK-NEXT: cleanup
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: [[TMP9]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br label [[BB9]]
; CHECK: bb11:
; CHECK-NEXT: ret void
; CHECK: bb12:
-; CHECK-NEXT: [[TMP9]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ]
+; CHECK-NEXT: [[TMP10]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ]
; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i64 }
-; CHECK-NEXT: cleanup
+; CHECK-NEXT: cleanup
; CHECK-NEXT: br label [[BB9]]
;
bb1:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index dbc4f3d59d4f9b..6f63b2a8f9aac1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -13,6 +13,7 @@ define void @test() {
; CHECK-NEXT: br label %[[BB64]]
; CHECK: [[BB64]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16
; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4
; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8
@@ -25,48 +26,45 @@ define void @test() {
; CHECK-NEXT: [[I75:%.*]] = load float, ptr poison, align 16
; CHECK-NEXT: [[I76:%.*]] = load float, ptr poison, align 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x float> poison, float [[I76]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x float> [[TMP2]], float [[I74]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I73]], i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I71]], i32 4
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I70]], i32 5
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I74]], i32 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I73]], i32 5
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I72]], i32 6
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I71]], i32 7
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I70]], i32 8
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I69]], i32 10
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I68]], i32 13
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 14
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x float> [[TMP11]], float [[I66]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x float> [[TMP12]], <16 x float> poison, <8 x i32> <i32 poison, i32 15, i32 8, i32 13, i32 10, i32 14, i32 14, i32 14>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 1, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP14]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP32:%.*]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 5, i32 1, i32 2, i32 3, i32 4, i32 1, i32 0, i32 3, i32 5, i32 1, i32 0, i32 3, i32 5, i32 1>
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 2, i32 poison, i32 4, i32 poison, i32 0, i32 poison, i32 5, i32 poison, i32 2, i32 poison, i32 4, i32 poison, i32 2, i32 poison, i32 4, i32 poison>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
+; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 16, i32 2, i32 17, i32 4, i32 poison, i32 6, i32 poison, i32 8, i32 poison, i32 10, i32 poison, i32 12, i32 poison, i32 14, i32 poison>
+; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 1, i32 6, i32 3, i32 8, i32 1, i32 10, i32 3, i32 12, i32 1, i32 14, i32 3>
+; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[TMP2]], i64 4)
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
-; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
-; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
+; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 15, i32 8, i32 13, i32 10, i32 14, i32 14, i32 14>
+; CHECK-NEXT: [[TMP32]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 9, i32 11>
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
; CHECK: [[BB167]]:
-; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
+; CHECK-NEXT: [[TMP30:%.*]] = phi <16 x float> [ [[TMP12]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP30]], i32 14
; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP30]], i32 6
; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP30]], i32 10
; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
; CHECK: [[BB184]]:
; CHECK-NEXT: br label %[[BB185:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index 43c42c1ea2bfb5..0b948d78821837 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -13,11 +13,11 @@ define void @test() {
; CHECK: [[BB6]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/111174
More information about the llvm-commits
mailing list