[llvm] [SLP] Fix condition for avoiding scheduling of instructions (PR #70035)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 05:49:47 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nabeel Omer (omern1)
<details>
<summary>Changes</summary>
To correctly avoid scheduling we must ensure that the instruction isn't used in the same basic block AND it's operands are not instructions themselves.
Fixes #<!-- -->70004.
---
Patch is 63.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70035.diff
26 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll (+22-12)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll (+11-11)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll (+10-10)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll (+18-18)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll (+13-13)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll (+10-10)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll (+3-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/multi-uses-with-deps-in-first.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll (+6-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll (+9-9)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll (+10-10)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll (+10-10)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll (+11-11)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll (+6-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll (+7-7)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll (+5-5)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reused-extracts.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll (+8-8)
- (added) llvm/test/Transforms/SLPVectorizer/X86/scheduling-cond.ll (+44)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll (+3-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll (+11-11)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f694ab88ccab8ea..3e3de5d9b4bf3ff 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1084,7 +1084,7 @@ static bool doesNotNeedToBeScheduled(Value *V) {
/// in other basic blocks.
static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
return !VL.empty() &&
- (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
+ (all_of(VL, doesNotNeedToBeScheduled));
}
namespace slpvectorizer {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index 8987f34e561e6fb..b48e7e08134be23 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -35,14 +35,19 @@ define void @PR28330(i32 %n) {
;
; MAX-COST-LABEL: @PR28330(
; MAX-COST-NEXT: entry:
-; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
-; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
+; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 5), align 1
+; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[TMP2]], zeroinitializer
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
; MAX-COST: for.body:
-; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
+; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; MAX-COST-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; MAX-COST-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; MAX-COST-NEXT: [[OP_RDX1]] = add i32 [[OP_RDX]], [[P17]]
; MAX-COST-NEXT: br label [[FOR_BODY]]
;
entry:
@@ -112,14 +117,19 @@ define void @PR32038(i32 %n) {
;
; MAX-COST-LABEL: @PR32038(
; MAX-COST-NEXT: entry:
-; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
-; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
+; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 5), align 1
+; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[TMP2]], zeroinitializer
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
; MAX-COST: for.body:
-; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5
+; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; MAX-COST-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; MAX-COST-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; MAX-COST-NEXT: [[OP_RDX1]] = add i32 [[OP_RDX]], -5
; MAX-COST-NEXT: br label [[FOR_BODY]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
index 1198bb1d509ebbd..d4b9b01b6983b18 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
@@ -40,8 +40,8 @@ declare float @llvm.fabs.f32(float)
define <4 x float> @insertelement_poison_lanes(ptr %0) {
; CHECK-LABEL: @insertelement_poison_lanes(
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
-; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0:%.*]], i64 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0
; CHECK-NEXT: store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
; CHECK-NEXT: ret <4 x float> [[INS_2]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll
index 0b26c53ca45030d..608066d75698c4a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll
@@ -215,8 +215,8 @@ define float @slp_not_profitable_in_loop(float %x, ptr %A) {
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[MUL12:%.*]] = fmul fast float 3.000000e+00, [[L_1]]
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[MUL16:%.*]] = fmul fast float 3.000000e+00, [[L_3]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL12]], [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
index f3638b5d087f4c1..a2dd0a8ba1659d9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
@@ -8,18 +8,18 @@ define void @test() #0 {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 2, i64 3, i64 1, i64 0>
-; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP4:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
-; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP2]]
-; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP7]], [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], <i64 2, i64 3, i64 1, i64 0>
+; CHECK-NEXT: [[TMP4]] = extractelement <4 x i64> [[TMP3]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP7]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[OP_RDX1]] = add i64 [[OP_RDX]], 0
; CHECK-NEXT: br label [[LOOP]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
index 2ea7f191947b43f..38ba06f7f5cddca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
@@ -7,17 +7,17 @@ define void @mainTest(i32 %param, ptr %vals, i32 %len) {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 31>, i32 [[PARAM:%.*]], i32 0
; CHECK-NEXT: br label [[BCI_15:%.*]]
; CHECK: bci_15:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 -1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0
; CHECK-NEXT: store atomic i32 [[TMP4]], ptr [[VALS:%.*]] unordered, align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP5]], [[TMP2]]
-; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
-; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[V44]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP2]], <i32 -1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP5]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP3]], 16
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
+; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP7]], i32 [[V44]], i32 1
; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
index 001da64c60a93f2..0237f699e40427b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
@@ -9,28 +9,28 @@ define double @test() {
; CHECK: cond.true:
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP10]], [[TMP2]]
+; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP10]], [[TMP2]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x double> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP18]], [[TMP14]]
+; CHECK-NEXT: [[TMP20:%.*]] = fsub <2 x double> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x double> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP21]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP20]]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
; CHECK-NEXT: [[ADD29:%.*]] = fadd double [[TMP25]], [[TMP26]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
index 18d9f8c903c5b4a..90a761dcd996b85 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -4,20 +4,12 @@
define void @exceed(double %0, double %1) {
; CHECK-LABEL: @exceed(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef
; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef
-; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef
; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef
@@ -27,15 +19,23 @@ define void @exceed(double %0, double %1) {
; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
+; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef
+; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]]
-; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <2 x double> [[TMP13]], undef
+; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef
; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [
; CHECK-NEXT: i32 0, label [[BB2:%.*]]
; CHECK-NEXT: ]
diff --git a/llvm/test/Transfo...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70035
More information about the llvm-commits
mailing list