[llvm] [SLP]Do not account external uses in EH block and in non-returning blocks (PR #112045)

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 07:15:24 PDT 2024


https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/112045

>From 6a06131973ea10e1407a3662567fed410875f405 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 11 Oct 2024 20:54:25 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  6 ++-
 .../X86/insertelements-with-reused-indices.ll | 14 +++---
 .../Transforms/SLPVectorizer/X86/partail.ll   | 30 ++++++-------
 .../Transforms/SLPVectorizer/X86/pr27163.ll   |  2 +-
 .../SLPVectorizer/X86/reorder_repeated_ops.ll | 44 ++++++-------------
 .../SLPVectorizer/X86/slp-throttle.ll         |  4 +-
 .../SLPVectorizer/scalarization-overhead.ll   | 13 +++---
 7 files changed, 47 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9826a8e8f8c678..29ef77db588703 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11748,11 +11748,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
     if (EphValues.count(EU.User))
       continue;
 
-    // Used in unreachable blocks or in landing pads (rarely executed).
+    // Used in unreachable blocks or in EH pads (rarely executed) or is
+    // terminated with unreachable instruction.
     if (BasicBlock *UserParent =
             EU.User ? cast<Instruction>(EU.User)->getParent() : nullptr;
         UserParent &&
-        (!DT->isReachableFromEntry(UserParent) || UserParent->isLandingPad()))
+        (!DT->isReachableFromEntry(UserParent) || UserParent->isEHPad() ||
+         isa_and_present<UnreachableInst>(UserParent->getTerminator())))
       continue;
 
     // We only add extract cost once for the same scalar.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
index e46c3b94383ffc..c154f5de3c7826 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
@@ -4,12 +4,16 @@
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> undef, float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul float 0.000000e+00, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = fsub float 0.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fsub float [[TMP7]], [[TMP10]]
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP8]], i32 0
 ; CHECK-NEXT:    unreachable
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
index 0b9ed47ce0f178..154b03c1107ec0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
@@ -13,28 +13,26 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef
 ; CHECK-NEXT:    [[SHR15:%.*]] = ashr i32 [[SUB14]], 2
-; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp sgt i32 [[SHR15]], 0
-; CHECK-NEXT:    [[COND_I_I:%.*]] = select i1 [[CMP_I_I]], i32 [[SHR15]], i32 0
-; CHECK-NEXT:    [[CMP_I4_I:%.*]] = icmp slt i32 [[COND_I_I]], undef
-; CHECK-NEXT:    [[COND_I5_I:%.*]] = select i1 [[CMP_I4_I]], i32 [[COND_I_I]], i32 undef
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SUB14]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SHR15]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[TMP2]], <i32 -9, i32 -5, i32 -1, i32 0>
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 undef, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef
+; CHECK-NEXT:    [[COND_I5_I:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3
 ; CHECK-NEXT:    [[IDXPROM30:%.*]] = sext i32 [[COND_I5_I]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30]]
-; CHECK-NEXT:    [[CMP_I_I_1:%.*]] = icmp sgt i32 [[SUB14]], -1
-; CHECK-NEXT:    [[COND_I_I_1:%.*]] = select i1 [[CMP_I_I_1]], i32 undef, i32 0
-; CHECK-NEXT:    [[CMP_I4_I_1:%.*]] = icmp slt i32 [[COND_I_I_1]], undef
-; CHECK-NEXT:    [[COND_I5_I_1:%.*]] = select i1 [[CMP_I4_I_1]], i32 [[COND_I_I_1]], i32 undef
+; CHECK-NEXT:    [[COND_I5_I_1:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2
 ; CHECK-NEXT:    [[IDXPROM30_1:%.*]] = sext i32 [[COND_I5_I_1]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_1]]
-; CHECK-NEXT:    [[CMP_I_I_2:%.*]] = icmp sgt i32 [[SUB14]], -5
-; CHECK-NEXT:    [[COND_I_I_2:%.*]] = select i1 [[CMP_I_I_2]], i32 undef, i32 0
-; CHECK-NEXT:    [[CMP_I4_I_2:%.*]] = icmp slt i32 [[COND_I_I_2]], undef
-; CHECK-NEXT:    [[COND_I5_I_2:%.*]] = select i1 [[CMP_I4_I_2]], i32 [[COND_I_I_2]], i32 undef
+; CHECK-NEXT:    [[COND_I5_I_2:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1
 ; CHECK-NEXT:    [[IDXPROM30_2:%.*]] = sext i32 [[COND_I5_I_2]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_2]]
-; CHECK-NEXT:    [[CMP_I_I_3:%.*]] = icmp sgt i32 [[SUB14]], -9
-; CHECK-NEXT:    [[COND_I_I_3:%.*]] = select i1 [[CMP_I_I_3]], i32 undef, i32 0
-; CHECK-NEXT:    [[CMP_I4_I_3:%.*]] = icmp slt i32 [[COND_I_I_3]], undef
-; CHECK-NEXT:    [[COND_I5_I_3:%.*]] = select i1 [[CMP_I4_I_3]], i32 [[COND_I_I_3]], i32 undef
+; CHECK-NEXT:    [[COND_I5_I_3:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0
 ; CHECK-NEXT:    [[IDXPROM30_3:%.*]] = sext i32 [[COND_I5_I_3]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_3]]
 ; CHECK-NEXT:    unreachable
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
index 9979bb9170d485..eed772b0dd104e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
@@ -9,7 +9,7 @@ define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  invoke.cont:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:    [[LOAD1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[P]], align 8
 ; CHECK-NEXT:    invoke void @throw()
 ; CHECK-NEXT:            to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
index 590e5a67bd4cef..95006071790ca1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -11,41 +11,23 @@ define void @hoge() {
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[T:%.*]] = select i1 undef, i16 undef, i16 15
-; CHECK-NEXT:    [[T3:%.*]] = sext i16 undef to i32
-; CHECK-NEXT:    [[T4:%.*]] = sext i16 [[T]] to i32
-; CHECK-NEXT:    [[T5:%.*]] = sub nsw i32 undef, [[T4]]
-; CHECK-NEXT:    [[T6:%.*]] = sub i32 [[T5]], undef
-; CHECK-NEXT:    [[T7:%.*]] = sub nsw i32 63, [[T3]]
-; CHECK-NEXT:    [[T8:%.*]] = sub i32 [[T7]], undef
-; CHECK-NEXT:    [[T9:%.*]] = add i32 [[T8]], undef
-; CHECK-NEXT:    [[T10:%.*]] = add nsw i32 [[T6]], 15
-; CHECK-NEXT:    [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]]
-; CHECK-NEXT:    [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]]
-; CHECK-NEXT:    [[T13:%.*]] = add nsw i32 [[T6]], 31
-; CHECK-NEXT:    [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]]
-; CHECK-NEXT:    [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]]
-; CHECK-NEXT:    [[T16:%.*]] = add nsw i32 [[T6]], 47
-; CHECK-NEXT:    [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]]
-; CHECK-NEXT:    [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 undef>, i16 [[T]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], <i32 undef, i32 15, i32 31, i32 47>
+; CHECK-NEXT:    [[T18:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef
 ; CHECK-NEXT:    [[T20:%.*]] = icmp sgt i32 [[T19]], 63
-; CHECK-NEXT:    [[T21:%.*]] = sub nsw i32 undef, [[T3]]
-; CHECK-NEXT:    [[T22:%.*]] = sub i32 [[T21]], undef
-; CHECK-NEXT:    [[T23:%.*]] = sub nsw i32 undef, [[T4]]
-; CHECK-NEXT:    [[T24:%.*]] = sub i32 [[T23]], undef
-; CHECK-NEXT:    [[T25:%.*]] = add nsw i32 [[T24]], -49
-; CHECK-NEXT:    [[T30:%.*]] = add nsw i32 [[T22]], -33
-; CHECK-NEXT:    [[T35:%.*]] = add nsw i32 [[T24]], -33
-; CHECK-NEXT:    [[T40:%.*]] = add nsw i32 [[T22]], -17
+; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 -49, i32 -33, i32 -33, i32 -17>
+; CHECK-NEXT:    [[T25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]]
-; CHECK-NEXT:    [[OP_RDX2:%.*]] = icmp slt i32 [[T30]], [[T35]]
-; CHECK-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]]
-; CHECK-NEXT:    [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]]
-; CHECK-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
-; CHECK-NEXT:    [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T40]]
-; CHECK-NEXT:    [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]]
-; CHECK-NEXT:    [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]]
+; CHECK-NEXT:    [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]]
 ; CHECK-NEXT:    unreachable
 ;
 bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
index d9496a3e3e343a..f7935c7af9631b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
@@ -5,11 +5,9 @@ define dso_local void @rftbsub(ptr %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: @rftbsub(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = or disjoint i64 2, 1
-; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[SUB22:%.*]] = fsub double undef, undef
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[ARRAYIDX12]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
 ; CHECK-NEXT:    [[ADD16:%.*]] = fadd double [[TMP1]], undef
 ; CHECK-NEXT:    [[MUL18:%.*]] = fmul double undef, [[ADD16]]
 ; CHECK-NEXT:    [[ADD19:%.*]] = fadd double undef, [[MUL18]]
diff --git a/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll
index 9f6b285f1ab90a..372202bd0cbd63 100644
--- a/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll
@@ -7,9 +7,8 @@
 define i16 @D134605() {
 ; CHECK-LABEL: @D134605(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX81:%.*]] = getelementptr inbounds [32 x i16], ptr poison, i16 0, i16 3
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX81]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr poison, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
 ; CHECK-NEXT:    [[REASS_ADD:%.*]] = add i16 poison, [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i16 [[TMP2]], 2
@@ -45,12 +44,10 @@ declare i16 @check_i16(i16, i16, i16)
 
 define void @PR58054() {
 ; CHECK-LABEL: @PR58054(
-; CHECK-NEXT:    [[VAL:%.*]] = add i64 poison, poison
-; CHECK-NEXT:    [[VAL2:%.*]] = add i64 poison, poison
-; CHECK-NEXT:    [[VAL3:%.*]] = mul i64 [[VAL2]], [[VAL]]
-; CHECK-NEXT:    [[VAL4:%.*]] = mul i64 [[VAL3]], [[VAL2]]
-; CHECK-NEXT:    [[VAL5:%.*]] = mul i64 [[VAL4]], [[VAL2]]
-; CHECK-NEXT:    [[VAL7:%.*]] = add i64 [[VAL]], [[VAL5]]
+; CHECK-NEXT:    [[VAL3:%.*]] = mul i64 poison, poison
+; CHECK-NEXT:    [[VAL4:%.*]] = mul i64 [[VAL3]], poison
+; CHECK-NEXT:    [[VAL5:%.*]] = mul i64 [[VAL4]], poison
+; CHECK-NEXT:    [[VAL7:%.*]] = add i64 poison, [[VAL5]]
 ; CHECK-NEXT:    [[VAL8:%.*]] = sitofp i64 [[VAL7]] to double
 ; CHECK-NEXT:    call void @wibble(i32 poison, double [[VAL8]], i64 poison)
 ; CHECK-NEXT:    unreachable



More information about the llvm-commits mailing list