[llvm] 7f03b22 - [SLP]Enable SDiv/UDiv support as main op in copyables (#161892)

Wed Oct 8 04:28:10 PDT 2025

Author: Alexey Bataev
Date: 2025-10-08T07:28:06-04:00
New Revision: 7f03b22dce1f4dae9db3ec8919db7dbd98202e0b

URL: https://github.com/llvm/llvm-project/commit/7f03b22dce1f4dae9db3ec8919db7dbd98202e0b
DIFF: https://github.com/llvm/llvm-project/commit/7f03b22dce1f4dae9db3ec8919db7dbd98202e0b.diff

LOG: [SLP]Enable SDiv/UDiv support as main op in copyables (#161892)

Allow SDiv/UDiv as a main operation in copyables support

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 91c3d4270b241..cfa8d2703592e 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10621,7 +10621,8 @@ class InstructionsCompatibilityAnalysis {
   /// elements.
   static bool isSupportedOpcode(const unsigned Opcode) {
     return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
-           Opcode == Instruction::Shl;
+           Opcode == Instruction::Shl || Opcode == Instruction::SDiv ||
+           Opcode == Instruction::UDiv;
   }
 
   /// Identifies the best candidate value, which represents main opcode
@@ -10939,6 +10940,8 @@ class InstructionsCompatibilityAnalysis {
       case Instruction::Add:
       case Instruction::LShr:
       case Instruction::Shl:
+      case Instruction::SDiv:
+      case Instruction::UDiv:
         VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
         break;
       default:
@@ -22066,8 +22069,10 @@ bool BoUpSLP::collectValuesToDemote(
     auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
       assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
       return all_of(E.Scalars, [&](Value *V) {
-        auto *I = cast<Instruction>(V);
         APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+        if (E.hasCopyableElements() && E.isCopyableElement(V))
+          return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL));
+        auto *I = cast<Instruction>(V);
         return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
                MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
       });

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index cf62fd5cf66f7..a888027479817 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -4,21 +4,14 @@
 define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: @test_add_sdiv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
-; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
-; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
-; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT:    [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -58,21 +51,14 @@ entry:
 define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: @test_add_udiv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
-; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
-; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
-; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT:    [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
 ; CHECK-NEXT:    ret void