[llvm] 884d7c1 - Revert "[SLP]Check for the whole vector vectorization in unique scalars analysis"
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 11:21:57 PDT 2024
Author: Alexey Bataev
Date: 2024-09-03T11:02:07-07:00
New Revision: 884d7c137a587fdd7549bd8a26f887bbeda0cc86
URL: https://github.com/llvm/llvm-project/commit/884d7c137a587fdd7549bd8a26f887bbeda0cc86
DIFF: https://github.com/llvm/llvm-project/commit/884d7c137a587fdd7549bd8a26f887bbeda0cc86.diff
LOG: Revert "[SLP]Check for the whole vector vectorization in unique scalars analysis"
This reverts commit b74e09cb20e6218320013b54c9ba2f5c069d44b9 after
post-commit review. The number of parts is calculated incorrectly.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
llvm/test/Transforms/SLPVectorizer/RISCV/unique-loads-insert-non-power-of-2.ll
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5c37c2fdd2de3e..2bb106672c2309 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -408,7 +408,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) {
/// total number of elements \p Size and number of registers (parts) \p
/// NumParts.
static unsigned getPartNumElems(unsigned Size, unsigned NumParts) {
- return std::min<unsigned>(Size, PowerOf2Ceil(divideCeil(Size, NumParts)));
+ return PowerOf2Ceil(divideCeil(Size, NumParts));
}
/// Returns correct remaining number of elements, considering total amount \p
@@ -7021,11 +7021,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
UniqueValues.emplace_back(V);
}
size_t NumUniqueScalarValues = UniqueValues.size();
- bool IsFullVectors =
- hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
- NumUniqueScalarValues);
- if (NumUniqueScalarValues == VL.size() &&
- (VectorizeNonPowerOf2 || IsFullVectors)) {
+ if (NumUniqueScalarValues == VL.size()) {
ReuseShuffleIndices.clear();
} else {
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
@@ -7036,10 +7032,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return false;
}
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
- return isa<UndefValue>(V) || !isConstant(V);
- }))) {
+ if (NumUniqueScalarValues <= 1 ||
+ (UniquePositions.size() == 1 && all_of(UniqueValues,
+ [](Value *V) {
+ return isa<UndefValue>(V) ||
+ !isConstant(V);
+ })) ||
+ !hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
+ NumUniqueScalarValues)) {
if (DoNotFail && UniquePositions.size() > 1 &&
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
all_of(UniqueValues, [=](Value *V) {
@@ -9143,6 +9143,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return nullptr;
Value *VecBase = nullptr;
ArrayRef<Value *> VL = E->Scalars;
+ // If the resulting type is scalarized, do not adjust the cost.
+ if (NumParts == VL.size())
+ return nullptr;
// Check if it can be considered reused if same extractelements were
// vectorized already.
bool PrevNodeFound = any_of(
@@ -9795,7 +9798,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
InsertMask[Idx] = I + 1;
}
unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
- if (NumOfParts > 0 && NumOfParts < NumElts)
+ if (NumOfParts > 0)
VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
VecScalarsSz;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unique-loads-insert-non-power-of-2.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unique-loads-insert-non-power-of-2.ll
deleted file mode 100644
index 595293803ca859..00000000000000
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/unique-loads-insert-non-power-of-2.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v -slp-threshold=-10 < %s | FileCheck %s
-
-define void @test(ptr %agg.result) {
-; CHECK-LABEL: define void @test(
-; CHECK-SAME: ptr [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX_I39_1:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 8
-; CHECK-NEXT: [[ARRAYIDX_I39_2:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 16
-; CHECK-NEXT: [[ADD_PTR_I41_1_1_1:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ADD_PTR_I41_1_1_1]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr null, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr null, align 8
-; CHECK-NEXT: [[MUL_1:%.*]] = fmul double [[TMP2]], 0.000000e+00
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 5>
-; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> [[TMP5]], <2 x double> [[TMP1]], i64 0)
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], <double 0.000000e+00, double 0.000000e+00, double 1.000000e+00, double 0.000000e+00>
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP8]], i32 1
-; CHECK-NEXT: store double [[TMP9]], ptr [[ARRAYIDX_I39_1]], align 8
-; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[ARRAYIDX_I39_2]], align 8
-; CHECK-NEXT: [[ARRAYIDX_I37_2:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 48
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[ARRAYIDX_I37_2]], align 8
-; CHECK-NEXT: [[ARRAYIDX_I39_2_2:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 64
-; CHECK-NEXT: [[MUL_1_2_2:%.*]] = fmul double 1.000000e+00, 0.000000e+00
-; CHECK-NEXT: [[MUL_2_2_2:%.*]] = fmul double 0.000000e+00, [[MUL_1_2_2]]
-; CHECK-NEXT: store double [[MUL_2_2_2]], ptr [[ARRAYIDX_I39_2_2]], align 8
-; CHECK-NEXT: ret void
-;
-entry:
- %0 = load double, ptr null, align 8
- %mul.1 = fmul double %0, 0.000000e+00
- %arrayidx.i39.1 = getelementptr i8, ptr %agg.result, i64 8
- %add.ptr.i41.1.1 = getelementptr i8, ptr null, i64 8
- %1 = load double, ptr %add.ptr.i41.1.1, align 8
- %mul.1.1 = fmul double %1, 0.000000e+00
- %mul.2.1 = fmul double 0.000000e+00, %mul.1.1
- store double %mul.2.1, ptr %arrayidx.i39.1, align 8
- %arrayidx.i39.2 = getelementptr i8, ptr %agg.result, i64 16
- %mul.1.2 = fmul double %0, 0.000000e+00
- %mul.2.2 = fmul double 0.000000e+00, %mul.1.2
- store double %mul.2.2, ptr %arrayidx.i39.2, align 8
- %arrayidx.i37.1 = getelementptr i8, ptr %agg.result, i64 24
- store double %mul.2.1, ptr %arrayidx.i37.1, align 8
- %arrayidx.i39.1.1 = getelementptr i8, ptr %agg.result, i64 32
- %add.ptr.i41.1.1.1 = getelementptr i8, ptr null, i64 16
- %2 = load double, ptr %add.ptr.i41.1.1.1, align 8
- %mul.1.1.1 = fmul double %2, 1.000000e+00
- %mul.2.1.1 = fmul double 0.000000e+00, %mul.1.1.1
- store double %mul.2.1.1, ptr %arrayidx.i39.1.1, align 8
- %arrayidx.i39.2.1 = getelementptr i8, ptr %agg.result, i64 40
- %mul.1.2.1 = fmul double %1, 0.000000e+00
- %mul.2.2.1 = fmul double 0.000000e+00, %mul.1.2.1
- store double %mul.2.2.1, ptr %arrayidx.i39.2.1, align 8
- %arrayidx.i37.2 = getelementptr i8, ptr %agg.result, i64 48
- store double %mul.2.2, ptr %arrayidx.i37.2, align 8
- %arrayidx.i39.1.2 = getelementptr i8, ptr %agg.result, i64 56
- store double %mul.2.2.1, ptr %arrayidx.i39.1.2, align 8
- %arrayidx.i39.2.2 = getelementptr i8, ptr %agg.result, i64 64
- %mul.1.2.2 = fmul double 1.000000e+00, 0.000000e+00
- %mul.2.2.2 = fmul double 0.000000e+00, %mul.1.2.2
- store double %mul.2.2.2, ptr %arrayidx.i39.2.2, align 8
- ret void
-}
More information about the llvm-commits
mailing list