[llvm] 8f16837 - [SLP]Support non-ordered copyable argument in non-commutative instructions
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 12 10:28:29 PDT 2025
Author: Alexey Bataev
Date: 2025-10-12T10:28:19-07:00
New Revision: 8f168376c11d45834a59da9d61b2d850f2342f32
URL: https://github.com/llvm/llvm-project/commit/8f168376c11d45834a59da9d61b2d850f2342f32
DIFF: https://github.com/llvm/llvm-project/commit/8f168376c11d45834a59da9d61b2d850f2342f32.diff
LOG: [SLP]Support non-ordered copyable argument in non-commutative instructions
If the non-commutative user has several same operands and at least one
of them (but not the first) is copyable, need to consider this
opportunity when calculating the number of dependencies. Otherwise, the
schedule bundle might be not scheduled correctly and cause a compiler
crash
Fixes #162925
Added:
llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2388375b8c2bb..96f05fc82f296 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5291,19 +5291,19 @@ class BoUpSLP {
// data.
for (TreeEntry *TE : Entries) {
// Check if the user is commutative.
- // The commutatives are handled later, as their oeprands can be
+ // The commutatives are handled later, as their operands can be
// reordered.
// Same applies even for non-commutative cmps, because we can invert
// their predicate potentially and, thus, reorder the operands.
bool IsCommutativeUser =
::isCommutative(User) ||
::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
- EdgeInfo EI(TE, U.getOperandNo());
if (!IsCommutativeUser && !isa<CmpInst>(User)) {
unsigned &OpCnt =
OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
+ EdgeInfo EI(TE, U.getOperandNo());
if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps)
- return false;
+ continue;
// Found copyable operand - continue.
++OpCnt;
continue;
@@ -5312,33 +5312,38 @@ class BoUpSLP {
.first->getSecond();
}
}
- // Check the commutative/cmp entries.
- if (!PotentiallyReorderedEntriesCount.empty()) {
- for (auto &P : PotentiallyReorderedEntriesCount) {
- auto *It = find(P.first->Scalars, User);
- assert(It != P.first->Scalars.end() &&
- "User is not in the tree entry");
- int Lane = std::distance(P.first->Scalars.begin(), It);
- assert(Lane >= 0 && "Lane is not found");
- if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty())
- Lane = P.first->ReorderIndices[Lane];
- assert(Lane < static_cast<int>(P.first->Scalars.size()) &&
- "Couldn't find extract lane");
- SmallVector<unsigned> OpIndices;
- for (unsigned OpIdx :
- seq<unsigned>(::getNumberOfPotentiallyCommutativeOps(
- P.first->getMainOp()))) {
- if (P.first->getOperand(OpIdx)[Lane] == Op &&
- getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op))
- --P.getSecond();
- }
- }
- return all_of(PotentiallyReorderedEntriesCount,
+ if (PotentiallyReorderedEntriesCount.empty())
+ return all_of(OrderedEntriesCount,
[&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps - 1;
+ return P.second == NumOps;
});
- }
- return true;
+ // Check the commutative/cmp entries.
+ for (auto &P : PotentiallyReorderedEntriesCount) {
+ auto *It = find(P.first->Scalars, User);
+ assert(It != P.first->Scalars.end() && "User is not in the tree entry");
+ int Lane = std::distance(P.first->Scalars.begin(), It);
+ assert(Lane >= 0 && "Lane is not found");
+ if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty())
+ Lane = P.first->ReorderIndices[Lane];
+ assert(Lane < static_cast<int>(P.first->Scalars.size()) &&
+ "Couldn't find extract lane");
+ SmallVector<unsigned> OpIndices;
+ for (unsigned OpIdx :
+ seq<unsigned>(::getNumberOfPotentiallyCommutativeOps(
+ P.first->getMainOp()))) {
+ if (P.first->getOperand(OpIdx)[Lane] == Op &&
+ getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op))
+ --P.getSecond();
+ }
+ }
+ return all_of(PotentiallyReorderedEntriesCount,
+ [&](const std::pair<const TreeEntry *, unsigned> &P) {
+ return P.second == NumOps - 1;
+ }) &&
+ all_of(OrderedEntriesCount,
+ [&](const std::pair<const TreeEntry *, unsigned> &P) {
+ return P.second == NumOps;
+ });
}
SmallVector<ScheduleCopyableData *>
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll
new file mode 100644
index 0000000000000..0561466b564be
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
+
+define i32 @main(ptr %q, ptr %a, i8 %.pre) {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: ptr [[Q:%.*]], ptr [[A:%.*]], i8 [[DOTPRE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[DOTPRE1:%.*]] = load i8, ptr [[Q]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[DOTPRE]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[DOTPRE1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 0, i32 1>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> <i32 poison, i32 1>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i32> [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
+; CHECK-NEXT: store <2 x i16> [[TMP6]], ptr [[A]], align 2
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %.pre1 = load i8, ptr %q, align 1
+ %conv11.i = sext i8 %.pre to i32
+ %shl18.i = shl i32 %conv11.i, %conv11.i
+ %conv19.i = trunc i32 %shl18.i to i16
+ store i16 %conv19.i, ptr %a, align 2
+ %0 = sext i8 %.pre1 to i32
+ %1 = add i32 %0, 1
+ %shl18.i.1 = shl i32 1, %1
+ %conv19.i.1 = trunc i32 %shl18.i.1 to i16
+ %arrayidx21.i.1 = getelementptr i8, ptr %a, i64 2
+ store i16 %conv19.i.1, ptr %arrayidx21.i.1, align 2
+ ret i32 0
+}
More information about the llvm-commits
mailing list