[llvm] [SLP]Enable float point math ops as copyables elements. (PR #169857)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 27 13:45:03 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
Patch enables support for float point math operations as base
instructions for copyables elements. It also fixes some scheduling
issues, found during testing
---
Patch is 79.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169857.diff
16 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+26-23)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll (+3-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll (+6-5)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll (+8-14)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll (+20-15)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll (+1-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll (+9-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/multi-node-user-with-copyable-ops.ll (+10-9)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/non-commutative-op-in-commutative-inst.ll (+5-7)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll (+4-20)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/user-with-multi-copyable-ops.ll (+18-26)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll (+26-102)
- (modified) llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll (+6-5)
- (modified) llvm/test/Transforms/SLPVectorizer/crash_exceed_scheduling.ll (+92-46)
- (modified) llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll (+34-38)
- (modified) llvm/test/Transforms/SLPVectorizer/insertelement-postpone.ll (+20-20)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3b36ccbd677dc..c3f8689e62d53 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5325,13 +5325,14 @@ class slpvectorizer::BoUpSLP {
if (ScheduleCopyableDataMap.empty())
return false;
SmallDenseMap<TreeEntry *, unsigned> PotentiallyReorderedEntriesCount;
- SmallDenseMap<const TreeEntry *, unsigned> OrderedEntriesCount;
+ ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
+ if (Entries.empty())
+ return false;
+ unsigned CurNumOps = 0;
for (const Use &U : User->operands()) {
if (U.get() != Op)
continue;
- ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
- if (Entries.empty())
- return false;
+ ++CurNumOps;
// Check all tree entries, if they have operands replaced by copyable
// data.
for (TreeEntry *TE : Entries) {
@@ -5363,28 +5364,28 @@ class slpvectorizer::BoUpSLP {
// reordered.
// Same applies even for non-commutative cmps, because we can invert
// their predicate potentially and, thus, reorder the operands.
+ constexpr unsigned NumCommutativeOps = 2;
bool IsCommutativeUser =
- ::isCommutative(User) ||
- ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
- if (!IsCommutativeUser && !isa<CmpInst>(User)) {
- unsigned &OpCnt =
- OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
+ U.getOperandNo() < NumCommutativeOps &&
+ (::isCommutative(User) ||
+ ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User));
+ bool IsCommutativeWithSameOps =
+ IsCommutativeUser &&
+ User->getNumOperands() >= NumCommutativeOps &&
+ User->getOperand(0) == User->getOperand(1);
+ if ((!IsCommutativeUser || IsCommutativeWithSameOps) &&
+ !isa<CmpInst>(User)) {
EdgeInfo EI(TE, U.getOperandNo());
- if (!getScheduleCopyableData(EI, Op))
+ if (CurNumOps != NumOps || getScheduleCopyableData(EI, Op))
continue;
- // Found copyable operand - continue.
- OpCnt += Inc;
- continue;
+ return false;
}
PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
.first->getSecond() += Inc;
}
}
if (PotentiallyReorderedEntriesCount.empty())
- return all_of(OrderedEntriesCount,
- [&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps;
- });
+ return true;
// Check the commutative/cmp entries.
for (auto &P : PotentiallyReorderedEntriesCount) {
SmallPtrSet<Value *, 4> ParentsUniqueUsers;
@@ -5430,10 +5431,6 @@ class slpvectorizer::BoUpSLP {
return all_of(PotentiallyReorderedEntriesCount,
[&](const std::pair<const TreeEntry *, unsigned> &P) {
return P.second == NumOps - 1;
- }) &&
- all_of(OrderedEntriesCount,
- [&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps;
});
}
@@ -5655,6 +5652,7 @@ class slpvectorizer::BoUpSLP {
}
};
+ SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
for (ScheduleBundle *Bundle : Bundles) {
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
break;
@@ -5662,7 +5660,6 @@ class slpvectorizer::BoUpSLP {
// Need to search for the lane since the tree entry can be
// reordered.
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
- SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
bool IsNonSchedulableWithParentPhiNode =
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
Bundle->getTreeEntry()->UserTreeIndex &&
@@ -10865,7 +10862,9 @@ class InstructionsCompatibilityAnalysis {
Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
Opcode == Instruction::And || Opcode == Instruction::Or ||
- Opcode == Instruction::Xor;
+ Opcode == Instruction::Xor || Opcode == Instruction::FAdd ||
+ Opcode == Instruction::FSub || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FDiv;
}
/// Identifies the best candidate value, which represents main opcode
@@ -11198,6 +11197,10 @@ class InstructionsCompatibilityAnalysis {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FSub:
+ case Instruction::FDiv:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
index 0783a28f56d85..961662c664a31 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
@@ -11,10 +11,10 @@ define void @p(double %0) {
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <2 x i32> <i32 1, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double poison, double poison>, <4 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x double> zeroinitializer, [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
index 0cc4d3db5c537..1abc16da77c8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
@@ -4,15 +4,16 @@
define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: [[BB:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> <float poison, float 0.000000e+00, float poison, float poison>, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP9:%.*]], %[[BB1]] ]
-; CHECK-NEXT: [[FMUL:%.*]] = fmul float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[FMUL:%.*]] = sitofp i32 0 to float
+; CHECK-NEXT: [[SITOFP:%.*]] = sitofp i32 0 to float
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[SITOFP]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 0, i32 0, i32 poison, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[FMUL]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP0]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index d13a8578d1e00..c1cc3f2dfc9e5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -7,36 +7,30 @@
define void @main(i1 %arg) {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
; CHECK: cond.true:
; CHECK-NEXT: unreachable
; CHECK: cond.end:
; CHECK-NEXT: br label [[INVOKE_CONT:%.*]]
; CHECK: invoke.cont:
-; CHECK-NEXT: br i1 %arg, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
+; CHECK-NEXT: br i1 [[ARG]], label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
; CHECK: arrayctor.cont:
; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]]
; CHECK: for.cond36.preheader:
-; CHECK-NEXT: br i1 %arg, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
; CHECK: cond.false51.us:
; CHECK-NEXT: unreachable
; CHECK: cond.true48.us:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
; CHECK: cond.false66.us:
-; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double poison, double 0xBFA5CC2D1960285F>, double [[ADD_I276_US]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.000000e-01>, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02)
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
-; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> <double 2.000000e-01, double 3.000000e-01>, [[TMP1]]
-; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
+; CHECK-NEXT: store <2 x double> <double 0x404900049667B5F2, double 0x404E0515D587DA7B>, ptr undef, align 8
+; CHECK-NEXT: store <2 x double> <double 2.000000e-07, double 0x3F91A436DC4B6CE6>, ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
; CHECK-NEXT: ret void
; CHECK: cond.true63.us:
; CHECK-NEXT: unreachable
; CHECK: for.body42.lr.ph.us:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
; CHECK: _Z5clampd.exit.1:
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER]]
;
@@ -96,7 +90,7 @@ _Z5clampd.exit.1:
define void @test(i1 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %arg, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: store <2 x double> <double 0x3FFA356C1D8A7F76, double 0x3FFDC4F38B38BEF4>, ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
index 6d713e83bbf4e..ca65ff88a4b81 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
@@ -9,33 +9,38 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_THEN]]:
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[DIV_2_I_I:%.*]] = fmul float [[TMP0]], 0.000000e+00
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP20]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP11]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x float> [ [[TMP8]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x float> [ zeroinitializer, %[[IF_THEN]] ], [ <float 0x7FF8000000000000, float 1.000000e+00>, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> <float poison, float 0.000000e+00>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = phi float [ 0.000000e+00, %[[IF_THEN]] ], [ 0x7FF8000000000000, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi float [ 0.000000e+00, %[[IF_THEN]] ], [ 1.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[FA_SROA_9_0:%.*]] = phi float [ [[DIV_2_I_I]], %[[IF_THEN]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x float> [ [[TMP10]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP22]], float [[FA_SROA_9_0]], i32 1
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP28]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x float> [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x float> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x float> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[CALL25:%.*]] = load volatile ptr, ptr null, align 8
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x float> [[TMP18]], [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x float> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x float> <float 1.000000e+00, float 1.000000e+00, float poison, float poison>, <4 x float> [[TMP30]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP32:%.*]] = fmul <4 x float> <float -0.000000e+00, float -0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP31]]
+; CHECK-NEXT: [[TMP26:%.*]] = fadd <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP32]]
; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float 1.000000e+00, float poison, float poison>, <4 x float> [[TMP28]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP25]], [[TMP26]]
; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[CALL25]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
index 6942df532ae29..91ec61b275205 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
@@ -25,8 +25,7 @@ define void @foo(double %i) {
; CHECK-NEXT: [[TMP20:%.*]] = fmul double 0.000000e+00, [[I82]]
; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, double [[I82]], i32 3
-; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>
; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll
index a07e617384e09..fd7f0c61b6737 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169857
More information about the llvm-commits
mailing list