[llvm] f5ee07a - [SLP]Improve instruction reordering mode detection.
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 13:01:59 PDT 2024
Author: Alexey Bataev
Date: 2024-07-08T16:01:55-04:00
New Revision: f5ee07a1b5ca6f337e6b4a46558c315b8ccd29dc
URL: https://github.com/llvm/llvm-project/commit/f5ee07a1b5ca6f337e6b4a46558c315b8ccd29dc
DIFF: https://github.com/llvm/llvm-project/commit/f5ee07a1b5ca6f337e6b4a46558c315b8ccd29dc.diff
LOG: [SLP]Improve instruction reordering mode detection.
The "instruction" reordering mode should be selected only if there are
compatible instructions in other operands, which can be reordered.
Otherwise, better to select splat reordering mode.
Metric: size..text
Program size..text
results results0 diff
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12383340.00 12383324.00 -0.0%
Some 4x operations get replaced by 8x.
Reviewers: RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/97485
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 56fa9371fb7f9..5bcab7e929231 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2213,6 +2213,27 @@ class BoUpSLP {
return getNumLanes() == 2 || Cnt > 1;
}
+ /// Checks if there is at least single compatible operand in lanes other
+ /// than \p Lane, compatible with the operand \p Op.
+ bool canBeVectorized(Instruction *Op, unsigned OpIdx, unsigned Lane) const {
+ bool OpAPO = getData(OpIdx, Lane).APO;
+ for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
+ if (Ln == Lane)
+ continue;
+ if (any_of(seq<unsigned>(getNumOperands()), [&](unsigned OpI) {
+ const OperandData &Data = getData(OpI, Ln);
+ if (Data.APO != OpAPO || Data.IsUsed)
+ return true;
+ Value *OpILn = getValue(OpI, Ln);
+ return (L && L->isLoopInvariant(OpILn)) ||
+ (getSameOpcode({Op, OpILn}, TLI).getOpcode() &&
+ Op->getParent() == cast<Instruction>(OpILn)->getParent());
+ }))
+ return true;
+ }
+ return false;
+ }
+
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
@@ -2268,14 +2289,14 @@ class BoUpSLP {
// side.
if (isa<LoadInst>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Load;
- else if (isa<Instruction>(OpLane0)) {
+ else if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
// Check if OpLane0 should be broadcast.
- if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
+ if (shouldBroadcast(OpLane0, OpIdx, FirstLane) ||
+ !canBeVectorized(OpILane0, OpIdx, FirstLane))
ReorderingModes[OpIdx] = ReorderingMode::Splat;
else
ReorderingModes[OpIdx] = ReorderingMode::Opcode;
- }
- else if (isa<Constant>(OpLane0))
+ } else if (isa<Constant>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Constant;
else if (isa<Argument>(OpLane0))
// Our best hope is a Splat. It may save some cost in some cases.
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
index ff06bdc0e8446..0fcbead65d0d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -12,11 +12,11 @@ define void @foo() local_unnamed_addr {
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 undef, i32 poison, i32 poison>, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 undef, i32 poison, i32 undef, i32 undef>, i32 [[ADD277]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[ARRAYIDX372]], align 4
; CHECK-NEXT: unreachable
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
index 156ab54dbf237..d88135df5c96a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
@@ -12,27 +12,18 @@ define void @test() {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX31]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i32 3
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 3>
+; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP14]], <4 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP0]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP11]], [[TMP14]]
; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP16]] = load float, ptr [[ARRAYIDX41]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP14]], i32 3
; CHECK-NEXT: [[MUL45:%.*]] = fmul fast float [[TMP16]], [[TMP6]]
; CHECK-NEXT: store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[INDVARS_IV]], 31990
More information about the llvm-commits
mailing list