[llvm] e0f00bd - [LV] Don't consider second op as invariant in getDivRemSpeculationCost.

Sat Sep 6 06:06:27 PDT 2025

Author: Florian Hahn
Date: 2025-09-06T14:06:04+01:00
New Revision: e0f00bd645bfd75a6717242cc42d63eeebb1db68

URL: https://github.com/llvm/llvm-project/commit/e0f00bd645bfd75a6717242cc42d63eeebb1db68
DIFF: https://github.com/llvm/llvm-project/commit/e0f00bd645bfd75a6717242cc42d63eeebb1db68.diff

LOG: [LV] Don't consider second op as invariant in getDivRemSpeculationCost.

The second operand when using a safe divisor will always be a select in
the loop, so won't be invariant; don't treat it as such.

This fixes a divergence with legacy and VPlan based cost model.

Fixes https://github.com/llvm/llvm-project/issues/156066.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5a1d1e75e2d5d..88e477e2dc64a 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2910,19 +2910,12 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
                              toVectorTy(Type::getInt1Ty(I->getContext()), VF),
                              CmpInst::BAD_ICMP_PREDICATE, CostKind);
 
-  // Certain instructions can be cheaper to vectorize if they have a constant
-  // second vector operand. One example of this are shifts on x86.
-  Value *Op2 = I->getOperand(1);
-  auto Op2Info = TTI.getOperandInfo(Op2);
-  if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
-      Legal->isInvariant(Op2))
-    Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
-
   SmallVector<const Value *, 4> Operands(I->operand_values());
   SafeDivisorCost += TTI.getArithmeticInstrCost(
-    I->getOpcode(), VecTy, CostKind,
-    {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
-    Op2Info, Operands, I);
+      I->getOpcode(), VecTy, CostKind,
+      {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+      {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+      Operands, I);
   return {ScalarizationCost, SafeDivisorCost};
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 20a16325c3999..6c1b2568d872a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -752,6 +752,146 @@ exit:
   ret i32 %p.2
 }
 
+; Test case for https://github.com/llvm/llvm-project/issues/156066.
+define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
+; CHECK-LABEL: @sdiv_by_zero(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SDIV_CONTINUE14:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]]
+; CHECK:       pred.sdiv.if:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = sdiv i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP4]], i32 0
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE]]
+; CHECK:       pred.sdiv.continue:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_SDIV_IF]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_SDIV_IF1:%.*]], label [[PRED_SDIV_CONTINUE2:%.*]]
+; CHECK:       pred.sdiv.if1:
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = sdiv i32 [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP9]], i32 1
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE2]]
+; CHECK:       pred.sdiv.continue2:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi <8 x i32> [ [[TMP6]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_SDIV_IF1]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_SDIV_IF3:%.*]], label [[PRED_SDIV_CONTINUE4:%.*]]
+; CHECK:       pred.sdiv.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT:    [[TMP14:%.*]] = sdiv i32 [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP14]], i32 2
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE4]]
+; CHECK:       pred.sdiv.continue4:
+; CHECK-NEXT:    [[TMP16:%.*]] = phi <8 x i32> [ [[TMP11]], [[PRED_SDIV_CONTINUE2]] ], [ [[TMP15]], [[PRED_SDIV_IF3]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
+; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_SDIV_IF5:%.*]], label [[PRED_SDIV_CONTINUE6:%.*]]
+; CHECK:       pred.sdiv.if5:
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT:    [[TMP19:%.*]] = sdiv i32 [[TMP18]], 0
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP19]], i32 3
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE6]]
+; CHECK:       pred.sdiv.continue6:
+; CHECK-NEXT:    [[TMP21:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_SDIV_CONTINUE4]] ], [ [[TMP20]], [[PRED_SDIV_IF5]] ]
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
+; CHECK-NEXT:    br i1 [[TMP22]], label [[PRED_SDIV_IF7:%.*]], label [[PRED_SDIV_CONTINUE8:%.*]]
+; CHECK:       pred.sdiv.if7:
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 4
+; CHECK-NEXT:    [[TMP24:%.*]] = sdiv i32 [[TMP23]], 0
+; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP24]], i32 4
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE8]]
+; CHECK:       pred.sdiv.continue8:
+; CHECK-NEXT:    [[TMP26:%.*]] = phi <8 x i32> [ [[TMP21]], [[PRED_SDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_SDIV_IF7]] ]
+; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
+; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_SDIV_IF9:%.*]], label [[PRED_SDIV_CONTINUE10:%.*]]
+; CHECK:       pred.sdiv.if9:
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 5
+; CHECK-NEXT:    [[TMP29:%.*]] = sdiv i32 [[TMP28]], 0
+; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP29]], i32 5
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE10]]
+; CHECK:       pred.sdiv.continue10:
+; CHECK-NEXT:    [[TMP31:%.*]] = phi <8 x i32> [ [[TMP26]], [[PRED_SDIV_CONTINUE8]] ], [ [[TMP30]], [[PRED_SDIV_IF9]] ]
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
+; CHECK-NEXT:    br i1 [[TMP32]], label [[PRED_SDIV_IF11:%.*]], label [[PRED_SDIV_CONTINUE12:%.*]]
+; CHECK:       pred.sdiv.if11:
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 6
+; CHECK-NEXT:    [[TMP34:%.*]] = sdiv i32 [[TMP33]], 0
+; CHECK-NEXT:    [[TMP35:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP34]], i32 6
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE12]]
+; CHECK:       pred.sdiv.continue12:
+; CHECK-NEXT:    [[TMP36:%.*]] = phi <8 x i32> [ [[TMP31]], [[PRED_SDIV_CONTINUE10]] ], [ [[TMP35]], [[PRED_SDIV_IF11]] ]
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
+; CHECK-NEXT:    br i1 [[TMP37]], label [[PRED_SDIV_IF13:%.*]], label [[PRED_SDIV_CONTINUE14]]
+; CHECK:       pred.sdiv.if13:
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 7
+; CHECK-NEXT:    [[TMP39:%.*]] = sdiv i32 [[TMP38]], 0
+; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <8 x i32> [[TMP36]], i32 [[TMP39]], i32 7
+; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE14]]
+; CHECK:       pred.sdiv.continue14:
+; CHECK-NEXT:    [[TMP41:%.*]] = phi <8 x i32> [ [[TMP36]], [[PRED_SDIV_CONTINUE12]] ], [ [[TMP40]], [[PRED_SDIV_IF13]] ]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP41]], <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <8 x i32> [[PREDPHI]], ptr [[TMP42]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i32 [[L]], 0
+; CHECK-NEXT:    br i1 [[ICMP]], label [[LOOP_LATCH]], label [[THEN:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[SDIV:%.*]] = sdiv i32 [[L]], 0
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ [[SDIV]], [[THEN]] ], [ 0, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[MERGE]], ptr [[GEP_DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp ult i64 [[IV]], 16
+; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+bb:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %bb ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %icmp = icmp eq i32 %l, 0
+  br i1 %icmp, label %loop.latch, label %then
+
+then:
+  %sdiv = sdiv i32 %l, 0
+  br label %loop.latch
+
+loop.latch:
+  %merge = phi i32 [ %sdiv, %then ], [ 0, %loop.header ]
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 %merge, ptr %gep.dst, align 4
+  %iv.next = add i64 %iv, 1
+  %ec = icmp ult i64 %iv, 16
+  br i1 %ec, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
 attributes #0 = { "target-cpu"="znver4" }
 attributes #1 = { "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
 attributes #2 = { "target-cpu"="znver3" }