[llvm] 98bb354 - [SLP]Fix PR107037: correctly track origonal/modified after vectorizations reduced values
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 15:51:52 PDT 2024
Author: Alexey Bataev
Date: 2024-09-03T15:49:19-07:00
New Revision: 98bb354a0add4aeb614430f48a23f87992166239
URL: https://github.com/llvm/llvm-project/commit/98bb354a0add4aeb614430f48a23f87992166239
DIFF: https://github.com/llvm/llvm-project/commit/98bb354a0add4aeb614430f48a23f87992166239.diff
LOG: [SLP]Fix PR107037: correctly track origonal/modified after vectorizations reduced values
Need to correctly track reduced values with multiple uses in the same
reduction emission attempt. Otherwise, the number of the reuses might be
calculated incorrectly, and may cause compiler crash.
Fixes https://github.com/llvm/llvm-project/issues/107037
Added:
llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cf802034cd56a3..5ac2e0c5586bbe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17778,10 +17778,12 @@ class HorizontalReduction {
// Emit code for constant values.
if (Candidates.size() > 1 && allConstant(Candidates)) {
Value *Res = Candidates.front();
- ++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
+ Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
+ ++VectorizedVals.try_emplace(OrigV).first->getSecond();
for (Value *VC : ArrayRef(Candidates).drop_front()) {
Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
- ++VectorizedVals.try_emplace(VC, 0).first->getSecond();
+ Value *OrigV = TrackedToOrig.find(VC)->second;
+ ++VectorizedVals.try_emplace(OrigV).first->getSecond();
if (auto *ResI = dyn_cast<Instruction>(Res))
V.analyzedReductionRoot(ResI);
}
@@ -17802,8 +17804,10 @@ class HorizontalReduction {
// Gather same values.
MapVector<Value *, unsigned> SameValuesCounter;
if (IsSupportedHorRdxIdentityOp)
- for (Value *V : Candidates)
- ++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
+ for (Value *V : Candidates) {
+ Value *OrigV = TrackedToOrig.find(V)->second;
+ ++SameValuesCounter.try_emplace(OrigV).first->second;
+ }
// Used to check if the reduced values used same number of times. In this
// case the compiler may produce better code. E.g. if reduced values are
// aabbccdd (8 x values), then the first node of the tree will have a node
@@ -17827,12 +17831,12 @@ class HorizontalReduction {
});
Candidates.resize(SameValuesCounter.size());
transform(SameValuesCounter, Candidates.begin(),
- [](const auto &P) { return P.first; });
+ [&](const auto &P) { return TrackedVals.at(P.first); });
NumReducedVals = Candidates.size();
// Have a reduction of the same element.
if (NumReducedVals == 1) {
Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
- unsigned Cnt = SameValuesCounter.lookup(OrigV);
+ unsigned Cnt = SameValuesCounter.find(OrigV)->second;
Value *RedVal =
emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
@@ -17937,7 +17941,7 @@ class HorizontalReduction {
continue;
Value *V = Candidates[Cnt];
Value *OrigV = TrackedToOrig.find(V)->second;
- ++SameValuesCounter[OrigV];
+ ++SameValuesCounter.find(OrigV)->second;
}
}
SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
@@ -17956,8 +17960,8 @@ class HorizontalReduction {
continue;
}
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
- unsigned NumOps =
- VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
+ unsigned NumOps = VectorizedVals.lookup(OrigV) +
+ SameValuesCounter.find(OrigV)->second;
if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
LocalExternallyUsedValues[RdxVal];
}
@@ -18085,10 +18089,11 @@ class HorizontalReduction {
for (Value *RdxVal : VL) {
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
if (IsSupportedHorRdxIdentityOp) {
- VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
+ VectorizedVals.try_emplace(OrigV,
+ SameValuesCounter.find(OrigV)->second);
continue;
}
- ++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
+ ++VectorizedVals.try_emplace(OrigV).first->getSecond();
if (!V.isVectorized(RdxVal))
RequiredExtract.insert(RdxVal);
}
@@ -18099,10 +18104,10 @@ class HorizontalReduction {
}
if (OptReusedScalars && !AnyVectorized) {
for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
- Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
+ Value *RdxVal = TrackedVals.find(P.first)->second;
+ Value *RedVal = emitScaleForReusedOps(RdxVal, Builder, P.second);
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
- Value *OrigV = TrackedToOrig.find(P.first)->second;
- VectorizedVals.try_emplace(OrigV, P.second);
+ VectorizedVals.try_emplace(P.first, P.second);
}
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
new file mode 100644
index 00000000000000..e012cc60960b3c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i8 @test() {
+; CHECK-LABEL: define i8 @test() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> zeroinitializer)
+; CHECK-NEXT: [[OP_RDX:%.*]] = or i8 [[TMP4]], [[TMP0]]
+; CHECK-NEXT: [[OP_RDX1:%.*]] = or i8 [[OP_RDX]], [[TMP2]]
+; CHECK-NEXT: [[OP_RDX2:%.*]] = or i8 [[OP_RDX1]], [[TMP0]]
+; CHECK-NEXT: [[OP_RDX3:%.*]] = or i8 [[OP_RDX2]], [[TMP1]]
+; CHECK-NEXT: [[OP_RDX4:%.*]] = or i8 [[OP_RDX3]], [[TMP3]]
+; CHECK-NEXT: ret i8 [[OP_RDX4]]
+;
+entry:
+ %0 = trunc i32 0 to i8
+ %1 = add i8 %0, 0
+ %2 = add i8 %0, 0
+ %3 = add i8 %0, 0
+ %4 = add i8 %0, 0
+ %5 = trunc i32 0 to i8
+ %6 = or i8 %5, %0
+ %7 = or i8 %6, %2
+ %8 = or i8 %7, %3
+ %9 = or i8 %8, %0
+ %10 = or i8 %9, %4
+ %conv4 = or i8 %10, %1
+ %11 = trunc i32 0 to i8
+ %12 = add i8 %11, 0
+ %conv7 = or i8 %conv4, %12
+ %13 = add i8 %11, 0
+ %14 = add i8 %11, 0
+ %15 = add i8 %11, 0
+ %16 = trunc i32 0 to i8
+ %17 = or i8 %13, %16
+ %18 = or i8 %17, %14
+ %19 = or i8 %18, %11
+ %20 = or i8 %19, %15
+ %conv5 = or i8 %20, %conv7
+ %21 = trunc i32 0 to i8
+ %conv6 = or i8 %21, %conv5
+ ret i8 %conv6
+}
More information about the llvm-commits
mailing list