[llvm] [SLP]Change the insertion point for outside-block-used nodes and prevec phi operand gathers (PR #139917)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 07:50:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-vectorizers
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
Need to set the insertion point for (non-schedulable) vector node after
the last instruction in the node to avoid def-use breakage. But it also
causes miscompilation with gather/buildvector operands of the phi nodes,
used in the same phi only in the block.
These nodes supposed to be inserted at the end of the block and after
changing the insertion point for the non-schedulable vec block, it also
may break def-use dependencies. Need to prevector such nodes, to emit
them as early as possible, so the vectorized nodes are inserted before
these nodes.
Fixes #<!-- -->139728
---
Patch is 20.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139917.diff
10 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+41-15)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll (+5-5)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll (+4-4)
- (added) llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll (+40)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/revec.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 45cf4e1eac092..04849b86af7ce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16128,16 +16128,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
- all_of(E->Scalars,
- [](Value *V) {
- return isa<PoisonValue>(V) ||
- (!isVectorLikeInstWithConstOps(V) &&
- isUsedOutsideBlock(V));
- }) ||
- (E->isGather() && E->Idx == 0 && all_of(E->Scalars, [](Value *V) {
- return isa<ExtractElementInst, UndefValue>(V) ||
- areAllOperandsNonInsts(V);
- })))
+ all_of(E->Scalars, [](Value *V) {
+ return isa<PoisonValue>(V) ||
+ (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
+ }))
Res = FindLastInst();
else
Res = FindFirstInst();
@@ -16193,7 +16187,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
}
if (IsPHI ||
(!E->isGather() && E->State != TreeEntry::SplitVectorize &&
- doesNotNeedToSchedule(E->Scalars)) ||
+ all_of(E->Scalars, areAllOperandsNonInsts)) ||
(GatheredLoadsEntriesFirst.has_value() &&
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
E->getOpcode() == Instruction::Load)) {
@@ -17782,17 +17776,27 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *VecOp = NewPhi->getIncomingValueForBlock(IBB);
NewPhi->addIncoming(VecOp, IBB);
TreeEntry *OpTE = getOperandEntry(E, I);
+ assert(!OpTE->VectorizedValue && "Expected no vectorized value.");
OpTE->VectorizedValue = VecOp;
continue;
}
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeOperand(E, I);
+ const TreeEntry *OpE = getOperandEntry(E, I);
+ Value *Vec;
+ if (OpE->isGather()) {
+ assert(OpE->VectorizedValue && "Expected vectorized value.");
+ Vec = OpE->VectorizedValue;
+ if (auto *IVec = dyn_cast<Instruction>(Vec))
+ Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
+ } else {
+ Vec = vectorizeOperand(E, I);
+ }
if (VecTy != Vec->getType()) {
- assert((It != MinBWs.end() || getOperandEntry(E, I)->isGather() ||
- MinBWs.contains(getOperandEntry(E, I))) &&
- "Expected item in MinBWs.");
+ assert(
+ (It != MinBWs.end() || OpE->isGather() || MinBWs.contains(OpE)) &&
+ "Expected item in MinBWs.");
Vec = Builder.CreateIntCast(Vec, VecTy, GetOperandSignedness(I));
}
NewPhi->addIncoming(Vec, IBB);
@@ -18690,6 +18694,28 @@ Value *BoUpSLP::vectorizeTree(
(void)vectorizeTree(TE.get());
}
}
+ // Vectorize gather operands of the PHI nodes.
+ for (const std::unique_ptr<TreeEntry> &TE : reverse(VectorizableTree)) {
+ if (TE->isGather() && TE->UserTreeIndex.UserTE &&
+ TE->UserTreeIndex.UserTE->hasState() &&
+ !TE->UserTreeIndex.UserTE->isAltShuffle() &&
+ TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize &&
+ TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI &&
+ !TE->VectorizedValue) {
+ auto *PH = cast<PHINode>(TE->UserTreeIndex.UserTE->getMainOp());
+ BasicBlock *IBB = PH->getIncomingBlock(TE->UserTreeIndex.EdgeIdx);
+ // If there is the same incoming block earlier - skip, it will be handled
+ // in PHI node.
+ if (TE->UserTreeIndex.EdgeIdx > 0 &&
+ any_of(seq<unsigned>(TE->UserTreeIndex.EdgeIdx), [&](unsigned Idx) {
+ return PH->getIncomingBlock(Idx) == IBB;
+ }))
+ continue;
+ Builder.SetInsertPoint(IBB->getTerminator());
+ Builder.SetCurrentDebugLocation(PH->getDebugLoc());
+ (void)vectorizeTree(TE.get());
+ }
+ }
(void)vectorizeTree(VectorizableTree[0].get());
// Run through the list of postponed gathers and emit them, replacing the temp
// emitted allocas with actual vector instructions.
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index 295a71899c338..d8ddfee3ed28b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -37,19 +37,19 @@ define void @test() {
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP17]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP16]], float [[I67]], i32 6
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I69]], i32 7
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> [[TMP23]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP23]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP30]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
@@ -58,8 +58,8 @@ define void @test() {
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
-; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 14, i32 15>
+; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
; CHECK: [[BB167]]:
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll
index e3c134b068e04..773b9c069569d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll
@@ -5,14 +5,14 @@ define void @test(ptr %0, float %1) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[TMP0:%.*]], float [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[BB8:.*]]
; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ [[TMP15:%.*]], %[[BB8]] ], [ [[TMP5]], [[TMP2:%.*]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP7]], %[[BB8]] ], [ [[TMP4]], [[TMP2]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP7]], %[[BB8]] ], [ [[TMP8]], [[TMP2]] ]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = fmul <4 x float> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
index 2a54ae9a1e749..8ad05694b5322 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
@@ -6,9 +6,9 @@ define i32 @test(i64 %l.549) {
; CHECK-SAME: i64 [[L_549:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV3:%.*]] = sext i32 0 to i64
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[IF_THEN19:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
index 5b936f65a3221..6fa33671a7b53 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
@@ -7,11 +7,11 @@ define void @test() {
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB5:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], %[[BB1]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 0, i32 0
; CHECK-NEXT: [[TMP3]] = or <2 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[OR3:%.*]] = or i32 [[TMP6]], 0
; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
index 289c6002851d7..5c7dc869395b8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
@@ -37,18 +37,18 @@ define i32 @test(i32 %s.0) {
; CHECK: [[IF_THEN18:.*]]:
; CHECK-NEXT: br label %[[T]]
; CHECK: [[T]]:
-; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
; CHECK: [[IF_END24]]:
-; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
+; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP19]], %[[T]] ]
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
; CHECK-NEXT: br label %[[O]]
; CHECK: [[O]]:
-; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP23]] = phi <4 x i32> [ [[TMP1]], %[[K]] ], [ [[TMP20]], %[[IF_END24]] ]
+; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP30]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> <i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
new file mode 100644
index 0000000000000..1c482e079bb0f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define i64 @test() {
+; CHECK-LABEL: define i64 @test() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 0, i32 1
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB5:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP4]] = or <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: br label %[[BB5]]
+; CHECK: [[BB5]]:
+; CHECK-NEXT: br i1 false, label %[[BB6:.*]], label %[[BB1]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP2]], %[[BB5]] ]
+; CHECK-NEXT: ret i64 0
+;
+bb:
+ br label %bb1
+
+bb1:
+ %phi = phi i32 [ 0, %bb ], [ %or, %bb5 ]
+ %phi2 = phi i32 [ 0, %bb ], [ %or4, %bb5 ]
+ %or = or i32 %phi, 0
+ %add = add i32 0, 0
+ %or3 = or i32 %add, %phi2
+ %or4 = or i32 %or3, 0
+ br label %bb5
+
+bb5:
+ br i1 false, label %bb6, label %bb1
+
+bb6:
+ %phi7 = phi i32 [ %or, %bb5 ]
+ %phi8 = phi i32 [ %or3, %bb5 ]
+ ret i64 0
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
index 2612a21b9eedf..a08d0b1ab16cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
@@ -16,10 +16,10 @@ define i32 @test(i1 %cond) {
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[OR92]] = or i32 1, 0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OR92]], i32 0
; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]]
-; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[OP_RDX]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll
index 5aa4dba2b8a1b..f2b9d6329bfdf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll
@@ -15,8 +15,8 @@ define i64 @Foo(ptr align 8 dereferenceable(344) %0, i64 %1) {
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: br label %[[BB16:.*]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i64> [ [[TMP11]], [[TMP2:%.*]] ], [ zeroinitializer, %[[TMP25:.*]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 36dbeed9bbcd5..ebe3eb6b53358 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -234,12 +234,12 @@ define void @test7() {
define void @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2)
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> z...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139917
More information about the llvm-commits
mailing list