[llvm] [SLP]Improve instruction reordering mode detection. (PR #97485)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 07:33:03 PDT 2024
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/97485
>From 42b649e45e0f653066114ef9d240a9a5da958f84 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 2 Jul 2024 21:39:50 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 26 +++++++++++++++++--
.../SLPVectorizer/SystemZ/pr34619.ll | 10 +++----
.../X86/reordering-single-phi.ll | 17 +++---------
3 files changed, 33 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 868e9e2687f57..1d92cc10bb092 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2184,6 +2184,27 @@ class BoUpSLP {
return getNumLanes() == 2 || Cnt > 1;
}
+ /// Checks if there is at least single compatible operand in lanes other
+ /// than \p Lane, compatible with the operand \p Op.
+ bool canBeVectorized(Instruction *Op, unsigned OpIdx, unsigned Lane) const {
+ bool OpAPO = getData(OpIdx, Lane).APO;
+ for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
+ if (Ln == Lane)
+ continue;
+ if (any_of(seq<unsigned>(getNumOperands()), [&](unsigned OpI) {
+ const OperandData &Data = getData(OpI, Ln);
+ if (Data.APO != OpAPO || Data.IsUsed)
+ return true;
+ Value *OpILn = getValue(OpI, Ln);
+ return (L && L->isLoopInvariant(OpILn)) ||
+ (getSameOpcode({Op, OpILn}, TLI).getOpcode() &&
+ Op->getParent() == cast<Instruction>(OpILn)->getParent());
+ }))
+ return true;
+ }
+ return false;
+ }
+
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
@@ -2239,9 +2260,10 @@ class BoUpSLP {
// side.
if (isa<LoadInst>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Load;
- else if (isa<Instruction>(OpLane0)) {
+ else if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
// Check if OpLane0 should be broadcast.
- if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
+ if (shouldBroadcast(OpLane0, OpIdx, FirstLane) ||
+ !canBeVectorized(OpILane0, OpIdx, FirstLane))
ReorderingModes[OpIdx] = ReorderingMode::Splat;
else
ReorderingModes[OpIdx] = ReorderingMode::Opcode;
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
index ff06bdc0e8446..0fcbead65d0d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -12,11 +12,11 @@ define void @foo() local_unnamed_addr {
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 undef, i32 poison, i32 poison>, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 undef, i32 poison, i32 undef, i32 undef>, i32 [[ADD277]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[ARRAYIDX372]], align 4
; CHECK-NEXT: unreachable
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
index 156ab54dbf237..d88135df5c96a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
@@ -12,27 +12,18 @@ define void @test() {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX31]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i32 3
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 3>
+; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP14]], <4 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP0]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP11]], [[TMP14]]
; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP16]] = load float, ptr [[ARRAYIDX41]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP14]], i32 3
; CHECK-NEXT: [[MUL45:%.*]] = fmul fast float [[TMP16]], [[TMP6]]
; CHECK-NEXT: store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[INDVARS_IV]], 31990
More information about the llvm-commits
mailing list