[llvm] [SLP]Enable Sub as a base instruction in copyables (PR #163231)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 12:09:09 PDT 2025
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/163231
>From d405138abe8d394ebcba7f438283a971462451cc Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Mon, 13 Oct 2025 10:47:58 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 24 ++++++++++++-------
.../X86/minbw-node-used-twice.ll | 11 ++-------
.../X86/parent-node-non-schedulable.ll | 4 ++--
.../X86/vect_copyable_in_binops.ll | 2 +-
4 files changed, 20 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f95d28813fa23..be14567948c22 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10657,10 +10657,11 @@ class InstructionsCompatibilityAnalysis {
/// Checks if the opcode is supported as the main opcode for copyable
/// elements.
static bool isSupportedOpcode(const unsigned Opcode) {
- return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
- Opcode == Instruction::Shl || Opcode == Instruction::SDiv ||
- Opcode == Instruction::UDiv || Opcode == Instruction::And ||
- Opcode == Instruction::Or || Opcode == Instruction::Xor;
+ return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
+ Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
+ Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
+ Opcode == Instruction::And || Opcode == Instruction::Or ||
+ Opcode == Instruction::Xor;
}
/// Identifies the best candidate value, which represents main opcode
@@ -10678,7 +10679,7 @@ class InstructionsCompatibilityAnalysis {
};
// Exclude operands instructions immediately to improve compile time, it
// will be unable to schedule anyway.
- SmallDenseSet<Value *, 8> Operands;
+ SmallDenseMap<unsigned, SmallDenseSet<Value *, 8>> Operands;
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
bool AnyUndef = false;
for (Value *V : VL) {
@@ -10692,12 +10693,12 @@ class InstructionsCompatibilityAnalysis {
if (Candidates.empty()) {
Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
Parent = I->getParent();
- Operands.insert(I->op_begin(), I->op_end());
+ Operands[I->getOpcode()].insert(I->op_begin(), I->op_end());
continue;
}
if (Parent == I->getParent()) {
Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
- Operands.insert(I->op_begin(), I->op_end());
+ Operands[I->getOpcode()].insert(I->op_begin(), I->op_end());
continue;
}
auto *NodeA = DT.getNode(Parent);
@@ -10712,7 +10713,7 @@ class InstructionsCompatibilityAnalysis {
Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
Parent = I->getParent();
Operands.clear();
- Operands.insert(I->op_begin(), I->op_end());
+ Operands[I->getOpcode()].insert(I->op_begin(), I->op_end());
}
}
unsigned BestOpcodeNum = 0;
@@ -10720,8 +10721,12 @@ class InstructionsCompatibilityAnalysis {
for (const auto &P : Candidates) {
if (P.second.size() < BestOpcodeNum)
continue;
+ const auto &Ops = Operands.at(P.first);
+ // If have inner dependencies - skip.
+ if (any_of(P.second, [&](Instruction *I) { return Ops.contains(I); }))
+ continue;
for (Instruction *I : P.second) {
- if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
+ if (IsSupportedInstruction(I, AnyUndef)) {
MainOp = I;
BestOpcodeNum = P.second.size();
break;
@@ -10981,6 +10986,7 @@ class InstructionsCompatibilityAnalysis {
getWidenedType(S.getMainOp()->getType(), VL.size());
switch (MainOpcode) {
case Instruction::Add:
+ case Instruction::Sub:
case Instruction::LShr:
case Instruction::Shl:
case Instruction::SDiv:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
index 55f2b238c07df..24899900ebb3a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
@@ -4,15 +4,8 @@
define i8 @test() {
; CHECK-LABEL: define i8 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[SUB_I_I79_PEEL_I]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i16> [[TMP3]], [[TMP0]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i16> [[TMP4]], [[TMP0]]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i16> <i16 -1, i16 0>, <i16 -1, i16 0>
+; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 false to i8
; CHECK-NEXT: ret i8 [[CONV13_I89_PEEL_I]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
index 7c8cb02f28c63..60e13d0b4cb6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
@@ -6,12 +6,12 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) {
; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
+; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> <i64 1, i64 1, i64 1, i64 poison>, i64 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4
; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <6 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 2, i32 2>
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <6 x i64> poison, i64 [[TMP14]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 3e0a3741d6bbc..2a0e7889f0f34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -183,7 +183,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 1, i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> [[TMP0]], <i32 1, i32 -1, i32 0, i32 -3>
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list