[llvm] a7b1889 - [SLP]Consider non-inst operands, when checking insts, used outside only
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 26 12:54:19 PDT 2025
Author: Alexey Bataev
Date: 2025-10-26T12:53:48-07:00
New Revision: a7b188983fd804e05a15835c3f0df5fa7523c246
URL: https://github.com/llvm/llvm-project/commit/a7b188983fd804e05a15835c3f0df5fa7523c246
DIFF: https://github.com/llvm/llvm-project/commit/a7b188983fd804e05a15835c3f0df5fa7523c246.diff
LOG: [SLP]Consider non-inst operands, when checking insts, used outside only
If the instructions in the node do not require scheduling and used
outside basic block only, still need to check, if their operands are
non-inst too. Such nodes should be emitted in the beginning of the
block.
Fixes #165151
Added:
llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cdb9e7e381121..4fcaf6dabb513 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17641,12 +17641,28 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
- all_of(E->Scalars, [&](Value *V) {
- return isa<PoisonValue>(V) ||
- (E->Idx == 0 && isa<InsertElementInst>(V)) ||
- E->isCopyableElement(V) ||
- (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
- }))
+ (all_of(E->Scalars,
+ [&](Value *V) {
+ return isa<PoisonValue>(V) ||
+ (E->Idx == 0 && isa<InsertElementInst>(V)) ||
+ E->isCopyableElement(V) ||
+ (!isVectorLikeInstWithConstOps(V) &&
+ isUsedOutsideBlock(V));
+ }) &&
+ (!E->doesNotNeedToSchedule() ||
+ any_of(E->Scalars,
+ [&](Value *V) {
+ if (!isa<Instruction>(V) ||
+ (E->hasCopyableElements() && E->isCopyableElement(V)))
+ return false;
+ return !areAllOperandsNonInsts(V);
+ }) ||
+ none_of(E->Scalars, [&](Value *V) {
+ if (!isa<Instruction>(V) ||
+ (E->hasCopyableElements() && E->isCopyableElement(V)))
+ return false;
+ return MustGather.contains(V);
+ }))))
Res = FindLastInst();
else
Res = FindFirstInst();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
index ff0887cf12447..fbf63230b6edf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
@@ -6,9 +6,9 @@ define void @test(ptr %0, i64 %1, i64 %2) {
; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3, i32 1>
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
; CHECK-NEXT: br [[DOTPREHEADER_LR_PH:label %.*]]
; CHECK: [[_PREHEADER_LR_PH:.*:]]
; CHECK-NEXT: br [[DOTPREHEADER_US_US_PREHEADER:label %.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
index bfb623ac5a9b9..6d713e83bbf4e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
@@ -9,11 +9,11 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_THEN]]:
-; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
new file mode 100644
index 0000000000000..ec554b4607cce
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-100 < %s | FileCheck %s
+
+define void @test(i32 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARG]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: br i1 false, label %[[BB8:.*]], label %[[BB4:.*]]
+; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]]
+; CHECK-NEXT: br label %[[BB8]]
+; CHECK: [[BB8]]:
+; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB4]] ], [ [[TMP1]], %[[BB1]] ]
+; CHECK-NEXT: ret void
+;
+bb:
+ br label %bb1
+
+bb1:
+ %sub = sub i32 0, %arg
+ %add = add i32 0, 0
+ %add2 = add i32 0, 0
+ %add3 = add i32 0, 0
+ br i1 false, label %bb8, label %bb4
+
+bb4:
+ %add5 = add i32 %add3, 0
+ %add6 = add i32 0, 0
+ %add7 = add i32 0, 0
+ br label %bb8
+
+bb8:
+ %phi = phi i32 [ %sub, %bb4 ], [ %sub, %bb1 ]
+ %phi9 = phi i32 [ %add5, %bb4 ], [ %add, %bb1 ]
+ %phi10 = phi i32 [ %add6, %bb4 ], [ %add2, %bb1 ]
+ %phi11 = phi i32 [ %add7, %bb4 ], [ %add3, %bb1 ]
+ ret void
+}
+
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
index 7c8cb02f28c63..2623366d2b91e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
@@ -5,11 +5,11 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> <i64 1, i64 1, i64 1, i64 poison>, i64 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4
; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-
diff erent-parents.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-
diff erent-parents.ll
index ef75a8dd99169..88e2cca176666 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-
diff erent-parents.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-
diff erent-parents.ll
@@ -10,13 +10,13 @@ define i32 @test(i32 %0, i1 %1) {
; CHECK-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB7:.*]], label %[[BB9:.*]]
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
+; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
; CHECK-NEXT: br label %[[BB16:.*]]
; CHECK: [[BB9]]:
; CHECK-NEXT: br i1 false, label %[[BB14:.*]], label %[[BB10:.*]]
; CHECK: [[BB10]]:
-; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]])
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> zeroinitializer)
; CHECK-NEXT: br label %[[BB14]]
; CHECK: [[BB14]]:
More information about the llvm-commits
mailing list