[llvm] 688cddd - [SLP] Honor min/max regsize and min/max VF in vectorizeStores
Bjorn Pettersson via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 22 09:30:06 PDT 2021
Author: Bjorn Pettersson
Date: 2021-03-22T17:29:35+01:00
New Revision: 688cdddafb0dfdeb5f3c5e1e22b88a0cdfc54c0c
URL: https://github.com/llvm/llvm-project/commit/688cdddafb0dfdeb5f3c5e1e22b88a0cdfc54c0c
DIFF: https://github.com/llvm/llvm-project/commit/688cdddafb0dfdeb5f3c5e1e22b88a0cdfc54c0c.diff
LOG: [SLP] Honor min/max regsize and min/max VF in vectorizeStores
Make sure we use PowerOf2Floor instead of PowerOf2Ceil when
calculating max number of elements that fits inside a vector
register (otherwise we could end up creating vectors larger
than the maximum vector register size).
Also make sure we honor the min/max VF (as given by TTI or
cmd line parameters) when doing vectorizeStores.
Reviewed By: anton-afanasyev
Differential Revision: https://reviews.llvm.org/D97691
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0ec802799c22..f36d2fc3ab2a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6152,17 +6152,18 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
I = ConsecutiveChain[I];
}
- // If a vector register can't hold 1 element, we are done.
unsigned MaxVecRegSize = R.getMaxVecRegSize();
unsigned EltSize = R.getVectorElementSize(Operands[0]);
- if (MaxVecRegSize % EltSize != 0)
- continue;
+ unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
+
+ unsigned MinVF = std::max(2U, R.getMinVecRegSize() / EltSize);
+ unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
+ MaxElts);
- unsigned MaxElts = MaxVecRegSize / EltSize;
// FIXME: Is division-by-2 the correct step? Should we assert that the
// register size is a power-of-2?
unsigned StartIdx = 0;
- for (unsigned Size = llvm::PowerOf2Ceil(MaxElts); Size >= 2; Size /= 2) {
+ for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size);
if (!VectorizedStores.count(Slice.front()) &&
diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
index 367795a82559..be03ab7813e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
@@ -15,15 +15,20 @@
define void @foo() {
; CHECK-VF8-160-LABEL: @foo(
-; CHECK-VF8-160-NEXT: store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* bitcast ([8 x i32]* @X to <8 x i32>*), align 1
+; CHECK-VF8-160-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
+; CHECK-VF8-160-NEXT: store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
; CHECK-VF8-160-NEXT: ret void
;
; CHECK-VF4-160-LABEL: @foo(
-; CHECK-VF4-160-NEXT: store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* bitcast ([8 x i32]* @X to <8 x i32>*), align 1
+; CHECK-VF4-160-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
+; CHECK-VF4-160-NEXT: store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
; CHECK-VF4-160-NEXT: ret void
;
; CHECK-VF2-160-LABEL: @foo(
-; CHECK-VF2-160-NEXT: store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* bitcast ([8 x i32]* @X to <8 x i32>*), align 1
+; CHECK-VF2-160-NEXT: store <2 x i32> <i32 1, i32 2>, <2 x i32>* bitcast ([8 x i32]* @X to <2 x i32>*), align 1
+; CHECK-VF2-160-NEXT: store <2 x i32> <i32 3, i32 4>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2) to <2 x i32>*), align 1
+; CHECK-VF2-160-NEXT: store <2 x i32> <i32 5, i32 6>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <2 x i32>*), align 1
+; CHECK-VF2-160-NEXT: store <2 x i32> <i32 7, i32 8>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6) to <2 x i32>*), align 1
; CHECK-VF2-160-NEXT: ret void
;
; CHECK-VF8-128-LABEL: @foo(
@@ -37,8 +42,10 @@ define void @foo() {
; CHECK-VF4-128-NEXT: ret void
;
; CHECK-VF2-128-LABEL: @foo(
-; CHECK-VF2-128-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
-; CHECK-VF2-128-NEXT: store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
+; CHECK-VF2-128-NEXT: store <2 x i32> <i32 1, i32 2>, <2 x i32>* bitcast ([8 x i32]* @X to <2 x i32>*), align 1
+; CHECK-VF2-128-NEXT: store <2 x i32> <i32 3, i32 4>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2) to <2 x i32>*), align 1
+; CHECK-VF2-128-NEXT: store <2 x i32> <i32 5, i32 6>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <2 x i32>*), align 1
+; CHECK-VF2-128-NEXT: store <2 x i32> <i32 7, i32 8>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6) to <2 x i32>*), align 1
; CHECK-VF2-128-NEXT: ret void
;
; CHECK-VF8-256-LABEL: @foo(
@@ -46,8 +53,14 @@ define void @foo() {
; CHECK-VF8-256-NEXT: ret void
;
; CHECK-VF2-128-128-LABEL: @foo(
-; CHECK-VF2-128-128-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
-; CHECK-VF2-128-128-NEXT: store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
+; CHECK-VF2-128-128-NEXT: store i32 1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 0), align 1
+; CHECK-VF2-128-128-NEXT: store i32 2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 1), align 1
+; CHECK-VF2-128-128-NEXT: store i32 3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2), align 1
+; CHECK-VF2-128-128-NEXT: store i32 4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 3), align 1
+; CHECK-VF2-128-128-NEXT: store i32 5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4), align 1
+; CHECK-VF2-128-128-NEXT: store i32 6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 5), align 1
+; CHECK-VF2-128-128-NEXT: store i32 7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6), align 1
+; CHECK-VF2-128-128-NEXT: store i32 8, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 7), align 1
; CHECK-VF2-128-128-NEXT: ret void
;
store i32 1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 0), align 1
More information about the llvm-commits
mailing list