[llvm] [SLP]Enable Sub as a base instruction in copyables (PR #163231)

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 17 12:09:09 PDT 2025


https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/163231

>From d405138abe8d394ebcba7f438283a971462451cc Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Mon, 13 Oct 2025 10:47:58 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 24 ++++++++++++-------
 .../X86/minbw-node-used-twice.ll              | 11 ++-------
 .../X86/parent-node-non-schedulable.ll        |  4 ++--
 .../X86/vect_copyable_in_binops.ll            |  2 +-
 4 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f95d28813fa23..be14567948c22 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10657,10 +10657,11 @@ class InstructionsCompatibilityAnalysis {
   /// Checks if the opcode is supported as the main opcode for copyable
   /// elements.
   static bool isSupportedOpcode(const unsigned Opcode) {
-    return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
-           Opcode == Instruction::Shl || Opcode == Instruction::SDiv ||
-           Opcode == Instruction::UDiv || Opcode == Instruction::And ||
-           Opcode == Instruction::Or || Opcode == Instruction::Xor;
+    return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
+           Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
+           Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
+           Opcode == Instruction::And || Opcode == Instruction::Or ||
+           Opcode == Instruction::Xor;
   }
 
   /// Identifies the best candidate value, which represents main opcode
@@ -10678,7 +10679,7 @@ class InstructionsCompatibilityAnalysis {
     };
     // Exclude operands instructions immediately to improve compile time, it
     // will be unable to schedule anyway.
-    SmallDenseSet<Value *, 8> Operands;
+    SmallDenseMap<unsigned, SmallDenseSet<Value *, 8>> Operands;
     SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
     bool AnyUndef = false;
     for (Value *V : VL) {
@@ -10692,12 +10693,12 @@ class InstructionsCompatibilityAnalysis {
       if (Candidates.empty()) {
         Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Parent = I->getParent();
-        Operands.insert(I->op_begin(), I->op_end());
+        Operands[I->getOpcode()].insert(I->op_begin(), I->op_end());
         continue;
       }
       if (Parent == I->getParent()) {
         Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
-        Operands.insert(I->op_begin(), I->op_end());
+        Operands[I->getOpcode()].insert(I->op_begin(), I->op_end());
         continue;
       }
       auto *NodeA = DT.getNode(Parent);
@@ -10712,7 +10713,7 @@ class InstructionsCompatibilityAnalysis {
         Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Parent = I->getParent();
         Operands.clear();
-        Operands.insert(I->op_begin(), I->op_end());
+        Operands[I->getOpcode()].insert(I->op_begin(), I->op_end());
       }
     }
     unsigned BestOpcodeNum = 0;
@@ -10720,8 +10721,12 @@ class InstructionsCompatibilityAnalysis {
     for (const auto &P : Candidates) {
       if (P.second.size() < BestOpcodeNum)
         continue;
+      const auto &Ops = Operands.at(P.first);
+      // If have inner dependencies - skip.
+      if (any_of(P.second, [&](Instruction *I) { return Ops.contains(I); }))
+        continue;
       for (Instruction *I : P.second) {
-        if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
+        if (IsSupportedInstruction(I, AnyUndef)) {
           MainOp = I;
           BestOpcodeNum = P.second.size();
           break;
@@ -10981,6 +10986,7 @@ class InstructionsCompatibilityAnalysis {
           getWidenedType(S.getMainOp()->getType(), VL.size());
       switch (MainOpcode) {
       case Instruction::Add:
+      case Instruction::Sub:
       case Instruction::LShr:
       case Instruction::Shl:
       case Instruction::SDiv:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
index 55f2b238c07df..24899900ebb3a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
@@ -4,15 +4,8 @@
 define i8 @test() {
 ; CHECK-LABEL: define i8 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[SUB_I_I79_PEEL_I]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i16> [[TMP3]], [[TMP0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <2 x i16> [[TMP4]], [[TMP0]]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT:    [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <2 x i16> <i16 -1, i16 0>, <i16 -1, i16 0>
+; CHECK-NEXT:    [[CONV13_I89_PEEL_I:%.*]] = zext i1 false to i8
 ; CHECK-NEXT:    ret i8 [[CONV13_I89_PEEL_I]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
index 7c8cb02f28c63..60e13d0b4cb6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
@@ -6,12 +6,12 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) {
 ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
+; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr null, align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i64> <i64 1, i64 1, i64 1, i64 poison>, i64 [[TMP2]], i32 3
 ; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr null, align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <6 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <6 x i64> poison, i64 [[TMP14]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 3e0a3741d6bbc..2a0e7889f0f34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -183,7 +183,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @addsub1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 1, i32 0, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> [[TMP0]], <i32 1, i32 -1, i32 0, i32 -3>
 ; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;



More information about the llvm-commits mailing list