[llvm] [SLP]Do not account external uses in EH block and in non-returning blocks (PR #112045)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 14:00:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
No need to account the cost of the external uses in EH and non-returning
basic blocks.
---
Full diff: https://github.com/llvm/llvm-project/pull/112045.diff
7 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+4-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll (+9-5)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/partail.ll (+14-16)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll (+13-31)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll (+1-3)
- (modified) llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll (+5-8)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9826a8e8f8c678..29ef77db588703 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11748,11 +11748,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
if (EphValues.count(EU.User))
continue;
- // Used in unreachable blocks or in landing pads (rarely executed).
+ // Used in unreachable blocks or in EH pads (rarely executed) or is
+ // terminated with unreachable instruction.
if (BasicBlock *UserParent =
EU.User ? cast<Instruction>(EU.User)->getParent() : nullptr;
UserParent &&
- (!DT->isReachableFromEntry(UserParent) || UserParent->isLandingPad()))
+ (!DT->isReachableFromEntry(UserParent) || UserParent->isEHPad() ||
+ isa_and_present<UnreachableInst>(UserParent->getTerminator())))
continue;
// We only add extract cost once for the same scalar.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
index e46c3b94383ffc..c154f5de3c7826 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
@@ -4,12 +4,16 @@
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> undef, float 0.000000e+00, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
; CHECK-NEXT: [[TMP4:%.*]] = fsub float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fsub float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[TMP10:%.*]] = fmul float 0.000000e+00, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = fsub float 0.000000e+00, [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub float [[TMP7]], [[TMP10]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP8]], i32 0
; CHECK-NEXT: unreachable
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
index 0b9ed47ce0f178..154b03c1107ec0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
@@ -13,28 +13,26 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
; CHECK: if.end:
; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef
; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2
-; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt i32 [[SHR15]], 0
-; CHECK-NEXT: [[COND_I_I:%.*]] = select i1 [[CMP_I_I]], i32 [[SHR15]], i32 0
-; CHECK-NEXT: [[CMP_I4_I:%.*]] = icmp slt i32 [[COND_I_I]], undef
-; CHECK-NEXT: [[COND_I5_I:%.*]] = select i1 [[CMP_I4_I]], i32 [[COND_I_I]], i32 undef
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SUB14]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SHR15]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[TMP2]], <i32 -9, i32 -5, i32 -1, i32 0>
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 undef, i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 2
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef
+; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef
+; CHECK-NEXT: [[COND_I5_I:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3
; CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[COND_I5_I]] to i64
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30]]
-; CHECK-NEXT: [[CMP_I_I_1:%.*]] = icmp sgt i32 [[SUB14]], -1
-; CHECK-NEXT: [[COND_I_I_1:%.*]] = select i1 [[CMP_I_I_1]], i32 undef, i32 0
-; CHECK-NEXT: [[CMP_I4_I_1:%.*]] = icmp slt i32 [[COND_I_I_1]], undef
-; CHECK-NEXT: [[COND_I5_I_1:%.*]] = select i1 [[CMP_I4_I_1]], i32 [[COND_I_I_1]], i32 undef
+; CHECK-NEXT: [[COND_I5_I_1:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2
; CHECK-NEXT: [[IDXPROM30_1:%.*]] = sext i32 [[COND_I5_I_1]] to i64
; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_1]]
-; CHECK-NEXT: [[CMP_I_I_2:%.*]] = icmp sgt i32 [[SUB14]], -5
-; CHECK-NEXT: [[COND_I_I_2:%.*]] = select i1 [[CMP_I_I_2]], i32 undef, i32 0
-; CHECK-NEXT: [[CMP_I4_I_2:%.*]] = icmp slt i32 [[COND_I_I_2]], undef
-; CHECK-NEXT: [[COND_I5_I_2:%.*]] = select i1 [[CMP_I4_I_2]], i32 [[COND_I_I_2]], i32 undef
+; CHECK-NEXT: [[COND_I5_I_2:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1
; CHECK-NEXT: [[IDXPROM30_2:%.*]] = sext i32 [[COND_I5_I_2]] to i64
; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_2]]
-; CHECK-NEXT: [[CMP_I_I_3:%.*]] = icmp sgt i32 [[SUB14]], -9
-; CHECK-NEXT: [[COND_I_I_3:%.*]] = select i1 [[CMP_I_I_3]], i32 undef, i32 0
-; CHECK-NEXT: [[CMP_I4_I_3:%.*]] = icmp slt i32 [[COND_I_I_3]], undef
-; CHECK-NEXT: [[COND_I5_I_3:%.*]] = select i1 [[CMP_I4_I_3]], i32 [[COND_I_I_3]], i32 undef
+; CHECK-NEXT: [[COND_I5_I_3:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0
; CHECK-NEXT: [[IDXPROM30_3:%.*]] = sext i32 [[COND_I5_I_3]] to i64
; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_3]]
; CHECK-NEXT: unreachable
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
index 9979bb9170d485..eed772b0dd104e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
@@ -9,7 +9,7 @@ define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 {
; CHECK-LABEL: @test1(
; CHECK-NEXT: invoke.cont:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT: [[LOAD1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[P]], align 8
; CHECK-NEXT: invoke void @throw()
; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
index 590e5a67bd4cef..95006071790ca1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -11,41 +11,23 @@ define void @hoge() {
; CHECK-NEXT: ret void
; CHECK: bb2:
; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15
-; CHECK-NEXT: [[T3:%.*]] = sext i16 undef to i32
-; CHECK-NEXT: [[T4:%.*]] = sext i16 [[T]] to i32
-; CHECK-NEXT: [[T5:%.*]] = sub nsw i32 undef, [[T4]]
-; CHECK-NEXT: [[T6:%.*]] = sub i32 [[T5]], undef
-; CHECK-NEXT: [[T7:%.*]] = sub nsw i32 63, [[T3]]
-; CHECK-NEXT: [[T8:%.*]] = sub i32 [[T7]], undef
-; CHECK-NEXT: [[T9:%.*]] = add i32 [[T8]], undef
-; CHECK-NEXT: [[T10:%.*]] = add nsw i32 [[T6]], 15
-; CHECK-NEXT: [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]]
-; CHECK-NEXT: [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]]
-; CHECK-NEXT: [[T13:%.*]] = add nsw i32 [[T6]], 31
-; CHECK-NEXT: [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]]
-; CHECK-NEXT: [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]]
-; CHECK-NEXT: [[T16:%.*]] = add nsw i32 [[T6]], 47
-; CHECK-NEXT: [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]]
-; CHECK-NEXT: [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]]
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 undef>, i16 [[T]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], <i32 undef, i32 15, i32 31, i32 47>
+; CHECK-NEXT: [[T18:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef
; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63
-; CHECK-NEXT: [[T21:%.*]] = sub nsw i32 undef, [[T3]]
-; CHECK-NEXT: [[T22:%.*]] = sub i32 [[T21]], undef
-; CHECK-NEXT: [[T23:%.*]] = sub nsw i32 undef, [[T4]]
-; CHECK-NEXT: [[T24:%.*]] = sub i32 [[T23]], undef
-; CHECK-NEXT: [[T25:%.*]] = add nsw i32 [[T24]], -49
-; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T22]], -33
-; CHECK-NEXT: [[T35:%.*]] = add nsw i32 [[T24]], -33
-; CHECK-NEXT: [[T40:%.*]] = add nsw i32 [[T22]], -17
+; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 -49, i32 -33, i32 -33, i32 -17>
+; CHECK-NEXT: [[T25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]])
; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]]
-; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T30]], [[T35]]
-; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]]
-; CHECK-NEXT: [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]]
-; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
-; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T40]]
-; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]]
-; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]]
+; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]]
; CHECK-NEXT: unreachable
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
index d9496a3e3e343a..f7935c7af9631b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
@@ -5,11 +5,9 @@ define dso_local void @rftbsub(ptr %a) local_unnamed_addr #0 {
; CHECK-LABEL: @rftbsub(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2
-; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 2, 1
-; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[SUB22:%.*]] = fsub double undef, undef
-; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX12]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
; CHECK-NEXT: [[ADD16:%.*]] = fadd double [[TMP1]], undef
; CHECK-NEXT: [[MUL18:%.*]] = fmul double undef, [[ADD16]]
; CHECK-NEXT: [[ADD19:%.*]] = fadd double undef, [[MUL18]]
diff --git a/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll
index 9f6b285f1ab90a..372202bd0cbd63 100644
--- a/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll
@@ -7,9 +7,8 @@
define i16 @D134605() {
; CHECK-LABEL: @D134605(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds [32 x i16], ptr poison, i16 0, i16 3
-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX81]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr poison, align 1
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
; CHECK-NEXT: [[REASS_ADD:%.*]] = add i16 poison, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP2]], 2
@@ -45,12 +44,10 @@ declare i16 @check_i16(i16, i16, i16)
define void @PR58054() {
; CHECK-LABEL: @PR58054(
-; CHECK-NEXT: [[VAL:%.*]] = add i64 poison, poison
-; CHECK-NEXT: [[VAL2:%.*]] = add i64 poison, poison
-; CHECK-NEXT: [[VAL3:%.*]] = mul i64 [[VAL2]], [[VAL]]
-; CHECK-NEXT: [[VAL4:%.*]] = mul i64 [[VAL3]], [[VAL2]]
-; CHECK-NEXT: [[VAL5:%.*]] = mul i64 [[VAL4]], [[VAL2]]
-; CHECK-NEXT: [[VAL7:%.*]] = add i64 [[VAL]], [[VAL5]]
+; CHECK-NEXT: [[VAL3:%.*]] = mul i64 poison, poison
+; CHECK-NEXT: [[VAL4:%.*]] = mul i64 [[VAL3]], poison
+; CHECK-NEXT: [[VAL5:%.*]] = mul i64 [[VAL4]], poison
+; CHECK-NEXT: [[VAL7:%.*]] = add i64 poison, [[VAL5]]
; CHECK-NEXT: [[VAL8:%.*]] = sitofp i64 [[VAL7]] to double
; CHECK-NEXT: call void @wibble(i32 poison, double [[VAL8]], i64 poison)
; CHECK-NEXT: unreachable
``````````
</details>
https://github.com/llvm/llvm-project/pull/112045
More information about the llvm-commits
mailing list