[llvm] [SCEV] Check if predicate is known false for predicated AddRecs. (PR #151134)

Tue Jul 29 05:13:08 PDT 2025

https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/151134

Similarly to https://github.com/llvm/llvm-project/pull/131538, we can also try and check if a predicate is known to wrap given the backedge taken count.

For now, this just checks directly when we try to create predicated AddRecs. This both helps to avoid spending compile-time on optimizations where we know the predicate is false, and can also help to allow additional vectorization (e.g. by deciding to scalarize memory accesses when otherwise we would try to create a predicated AddRec with a predicate that's always false).

The initial version is quite restricted, but can be extended in follow-ups to cover more cases.

>From 5c3445cd84f6a2552f69617ffe828ee159cd01f2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 19 Jul 2025 08:36:16 +0100
Subject: [PATCH] [SCEV] Check if predicate is known false for predicated
 AddRecs.

Similarly to https://github.com/llvm/llvm-project/pull/131538, we can
also try and check if a predicate is known to wrap given the backedge
taken count.

For now, this just checks directly when we try to create predicated
AddRecs. This both helps to avoid spending compile-time on optimizations
where we know the predicate is false, and can also help to allow
additional vectorization (e.g. by deciding to scalarize memory accesses
when otherwise we would try to create a predicated AddRec with a
predicate that's always false).

The initial version is quite restricted, but can be extended in
follow-ups to cover more cases.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 71 ++++++++++++++++---
 ...irst-order-recurrence-dead-instructions.ll | 63 +++++++++++++---
 2 files changed, 114 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 0990a0daac80c..5c75c641264b4 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2300,9 +2300,15 @@ CollectAddOperandsWithScales(SmallDenseMap<const SCEV *, APInt, 16> &M,
   return Interesting;
 }
 
-bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
-                                      const SCEV *LHS, const SCEV *RHS,
-                                      const Instruction *CtxI) {
+namespace {
+enum class OverflowCheckTy { WillOverflow, WillNotOverflow };
+}
+
+// Return true if (LHS BinOp RHS) is guaranteed to overflow (
+static bool checkOverflow(OverflowCheckTy Check, ScalarEvolution *SE,
+                          Instruction::BinaryOps BinOp, bool Signed,
+                          const SCEV *LHS, const SCEV *RHS,
+                          const Instruction *CtxI) {
   const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
                                             SCEV::NoWrapFlags, unsigned);
   switch (BinOp) {
@@ -2328,12 +2334,12 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
   auto *WideTy =
       IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
 
-  const SCEV *A = (this->*Extension)(
-      (this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
-  const SCEV *LHSB = (this->*Extension)(LHS, WideTy, 0);
-  const SCEV *RHSB = (this->*Extension)(RHS, WideTy, 0);
-  const SCEV *B = (this->*Operation)(LHSB, RHSB, SCEV::FlagAnyWrap, 0);
-  if (A == B)
+  const SCEV *A = (SE->*Extension)(
+      (SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
+  const SCEV *LHSB = (SE->*Extension)(LHS, WideTy, 0);
+  const SCEV *RHSB = (SE->*Extension)(RHS, WideTy, 0);
+  const SCEV *B = (SE->*Operation)(LHSB, RHSB, SCEV::FlagAnyWrap, 0);
+  if (Check == OverflowCheckTy::WillNotOverflow && A == B)
     return true;
   // Can we use context to prove the fact we need?
   if (!CtxI)
@@ -2361,21 +2367,31 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
   }
 
   ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+  if (Check == OverflowCheckTy::WillOverflow)
+    Pred = CmpInst::getInversePredicate(Pred);
+
   if (OverflowDown) {
     // To avoid overflow down, we need to make sure that MIN + Magnitude <= LHS.
     APInt Min = Signed ? APInt::getSignedMinValue(NumBits)
                        : APInt::getMinValue(NumBits);
     APInt Limit = Min + Magnitude;
-    return isKnownPredicateAt(Pred, getConstant(Limit), LHS, CtxI);
+    return SE->isKnownPredicateAt(Pred, SE->getConstant(Limit), LHS, CtxI);
   } else {
     // To avoid overflow up, we need to make sure that LHS <= MAX - Magnitude.
     APInt Max = Signed ? APInt::getSignedMaxValue(NumBits)
                        : APInt::getMaxValue(NumBits);
     APInt Limit = Max - Magnitude;
-    return isKnownPredicateAt(Pred, LHS, getConstant(Limit), CtxI);
+    return SE->isKnownPredicateAt(Pred, LHS, SE->getConstant(Limit), CtxI);
   }
 }
 
+bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
+                                      const SCEV *LHS, const SCEV *RHS,
+                                      const Instruction *CtxI) {
+  return checkOverflow(OverflowCheckTy::WillNotOverflow, this, BinOp, Signed,
+                       LHS, RHS, CtxI);
+}
+
 std::optional<SCEV::NoWrapFlags>
 ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
     const OverflowingBinaryOperator *OBO) {
@@ -14930,6 +14946,39 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
   if (!AddRec)
     return nullptr;
 
+  // Check if any of the transformed predicates is known to be false. In that
+  // case, it doesn't make sense to convert to an predicated AddRec, as the
+  // versioned loop will never execute.
+  for (const SCEVPredicate *Pred : TransformPreds) {
+    auto *WrapPred = dyn_cast<SCEVWrapPredicate>(Pred);
+    if (!WrapPred ||
+        (WrapPred->getFlags() != SCEVWrapPredicate::IncrementNSSW &&
+         WrapPred->getFlags() != SCEVWrapPredicate::IncrementNUSW))
+      continue;
+
+    const SCEVAddRecExpr *AddRecToCheck = WrapPred->getExpr();
+    const SCEV *ExitCount = getBackedgeTakenCount(AddRec->getLoop());
+    if (isa<SCEVCouldNotCompute>(ExitCount))
+      continue;
+
+    const SCEV *Step = AddRecToCheck->getStepRecurrence(*this);
+    if (!Step->isOne())
+      continue;
+
+    bool CheckSigned = WrapPred->getFlags() == SCEVWrapPredicate::IncrementNSSW;
+    ExitCount = getTruncateOrSignExtend(ExitCount, Step->getType());
+    if (checkOverflow(OverflowCheckTy::WillOverflow, this, Instruction::Add,
+                      CheckSigned, AddRecToCheck->getStart(), ExitCount,
+                      nullptr)) {
+      return nullptr;
+    }
+    const SCEV *S = getAddExpr(AddRecToCheck->getStart(), ExitCount);
+    if (isKnownPredicate(CheckSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT, S,
+                         AddRecToCheck->getStart())) {
+      return nullptr;
+    }
+  }
+
   // Since the transformation was successful, we can now transfer the SCEV
   // predicates.
   Preds.append(TransformPreds.begin(), TransformPreds.end());
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
index 71c2da2681b3d..e6a81b6f9f6d0 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
@@ -6,16 +6,61 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
 ; CHECK-LABEL: define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(
 ; CHECK-SAME: i8 [[FOR_START:%.*]], ptr [[DST:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[FOR_START]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLAT]], <4 x i8> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP6]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP8]]
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i8> [[TMP0]], i32 0
+; CHECK-NEXT:    store i8 [[TMP17]], ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i8> [[TMP0]], i32 1
+; CHECK-NEXT:    store i8 [[TMP18]], ptr [[TMP10]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i8> [[TMP0]], i32 2
+; CHECK-NEXT:    store i8 [[TMP19]], ptr [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i8> [[TMP0]], i32 3
+; CHECK-NEXT:    store i8 [[TMP20]], ptr [[TMP12]], align 1
+; CHECK-NEXT:    store i8 [[TMP17]], ptr [[TMP13]], align 1
+; CHECK-NEXT:    store i8 [[TMP18]], ptr [[TMP14]], align 1
+; CHECK-NEXT:    store i8 [[TMP19]], ptr [[TMP15]], align 1
+; CHECK-NEXT:    store i8 [[TMP20]], ptr [[TMP16]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], -8
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ -7, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[FOR_START]], %[[MIDDLE_BLOCK]] ], [ [[FOR_START]], %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[FOR:%.*]] = phi i8 [ [[FOR_START]], %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[FOR:%.*]] = phi i8 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[FOR_NEXT]] = and i8 [[FOR_START]], -1
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
 ; CHECK-NEXT:    store i8 [[FOR]], ptr [[GEP_DST]], align 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[FOR_NEXT_LCSSA:%.*]] = phi i8 [ [[FOR_NEXT]], %[[LOOP]] ]
 ; CHECK-NEXT:    ret i8 [[FOR_NEXT_LCSSA]]
@@ -61,7 +106,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
 ; CHECK-NEXT:    br label %[[FOR_END:.*]]
@@ -82,7 +127,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
 ; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
 ; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
 ; CHECK-NEXT:    store i32 0, ptr [[A_GEP]], align 4
-; CHECK-NEXT:    br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[FOR_LCSSA]]
@@ -142,7 +187,7 @@ define void @sink_dead_inst(ptr %a) {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
-; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT1:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
@@ -163,7 +208,7 @@ define void @sink_dead_inst(ptr %a) {
 ; CHECK-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
 ; CHECK-NEXT:    store i16 [[USE_REC_1]], ptr [[GEP]], align 2
-; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -205,7 +250,7 @@ define void @unused_recurrence(ptr %a) {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
@@ -220,7 +265,7 @@ define void @unused_recurrence(ptr %a) {
 ; CHECK-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
 ; CHECK-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
-; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret void
 ;