[llvm] d1a7225 - [SLP]Check if the node must keep its original bitwidth
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 08:04:17 PST 2024
Author: Alexey Bataev
Date: 2024-12-16T08:01:22-08:00
New Revision: d1a7225076218ce224cd29c74259b715b393dc9d
URL: https://github.com/llvm/llvm-project/commit/d1a7225076218ce224cd29c74259b715b393dc9d
DIFF: https://github.com/llvm/llvm-project/commit/d1a7225076218ce224cd29c74259b715b393dc9d.diff
LOG: [SLP]Check if the node must keep its original bitwidth
Need to check if during previous analysis the node has requested to keep
its original bitwidth to avoid incorrect codegen.
Fixes #120076
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3bd983ee6e125d..d967813075bb9f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2931,13 +2931,11 @@ class BoUpSLP {
/// truncation. We collect the entries that will be demoted in ToDemote.
/// \param E Node for analysis
/// \param ToDemote indices of the nodes to be demoted.
- bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
- unsigned &BitWidth,
- SmallVectorImpl<unsigned> &ToDemote,
- DenseSet<const TreeEntry *> &Visited,
- unsigned &MaxDepthLevel,
- bool &IsProfitableToDemote,
- bool IsTruncRoot) const;
+ bool collectValuesToDemote(
+ const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
+ SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
+ const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
+ bool &IsProfitableToDemote, bool IsTruncRoot) const;
/// Check if the operands on the edges \p Edges of the \p UserTE allows
/// reordering (i.e. the operands can be reordered because they have only one
@@ -17515,8 +17513,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
bool BoUpSLP::collectValuesToDemote(
const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
- unsigned &MaxDepthLevel, bool &IsProfitableToDemote,
- bool IsTruncRoot) const {
+ const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
+ bool &IsProfitableToDemote, bool IsTruncRoot) const {
// We can always demote constants.
if (all_of(E.Scalars, IsaPred<Constant>))
return true;
@@ -17528,6 +17526,10 @@ bool BoUpSLP::collectValuesToDemote(
return true;
}
+ // Check if the node was analyzed already and must keep its original bitwidth.
+ if (NodesToKeepBWs.contains(E.Idx))
+ return false;
+
// If the value is not a vectorized instruction in the expression and not used
// by the insertelement instruction and not used in multiple vector nodes, it
// cannot be demoted.
@@ -17623,8 +17625,8 @@ bool BoUpSLP::collectValuesToDemote(
for (const TreeEntry *Op : Operands) {
unsigned Level = InitLevel;
if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
- ToDemote, Visited, Level, IsProfitableToDemote,
- IsTruncRoot)) {
+ ToDemote, Visited, NodesToKeepBWs, Level,
+ IsProfitableToDemote, IsTruncRoot)) {
if (!IsProfitableToDemote)
return false;
NeedToExit = true;
@@ -17926,6 +17928,7 @@ void BoUpSLP::computeMinimumValueSizes() {
bool IsTruncRoot = false;
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
SmallVector<unsigned> RootDemotes;
+ SmallDenseSet<unsigned, 8> NodesToKeepBWs;
if (NodeIdx != 0 &&
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
@@ -17949,6 +17952,7 @@ void BoUpSLP::computeMinimumValueSizes() {
// Check if the root is trunc and the next node is gather/buildvector, then
// keep trunc in scalars, which is free in most cases.
if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
+ !NodesToKeepBWs.contains(E.Idx) &&
E.Idx > (IsStoreOrInsertElt ? 2u : 1u) &&
all_of(E.Scalars, [&](Value *V) {
return V->hasOneUse() || isa<Constant>(V) ||
@@ -18071,8 +18075,8 @@ void BoUpSLP::computeMinimumValueSizes() {
bool NeedToDemote = IsProfitableToDemote;
if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
- ToDemote, Visited, MaxDepthLevel, NeedToDemote,
- IsTruncRoot) ||
+ ToDemote, Visited, NodesToKeepBWs, MaxDepthLevel,
+ NeedToDemote, IsTruncRoot) ||
(MaxDepthLevel <= Limit &&
!(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
(!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
@@ -18206,7 +18210,7 @@ void BoUpSLP::computeMinimumValueSizes() {
});
}
- // If the maximum bit width we compute is less than the with of the roots'
+ // If the maximum bit width we compute is less than the width of the roots'
// type, we can proceed with the narrowing. Otherwise, do nothing.
if (MaxBitWidth == 0 ||
MaxBitWidth >=
@@ -18214,6 +18218,7 @@ void BoUpSLP::computeMinimumValueSizes() {
->getBitWidth()) {
if (UserIgnoreList)
AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
+ NodesToKeepBWs.insert(ToDemote.begin(), ToDemote.end());
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
index afc38bdf00c432..d3d7f21ee1003c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
@@ -6,10 +6,11 @@ define i8 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[SUB_I_I79_PEEL_I]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> zeroinitializer, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i16> [[TMP2]], [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i16> [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8
; CHECK-NEXT: ret i8 [[CONV13_I89_PEEL_I]]
More information about the llvm-commits
mailing list