[llvm] 36be0bb - [SCEV] Check if predicate is known false for predicated AddRecs. (#151134)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 01:30:29 PDT 2025
Author: Florian Hahn
Date: 2025-08-15T09:30:25+01:00
New Revision: 36be0bba2a8e45c48ae4336eff73abbcc77ee1f5
URL: https://github.com/llvm/llvm-project/commit/36be0bba2a8e45c48ae4336eff73abbcc77ee1f5
DIFF: https://github.com/llvm/llvm-project/commit/36be0bba2a8e45c48ae4336eff73abbcc77ee1f5.diff
LOG: [SCEV] Check if predicate is known false for predicated AddRecs. (#151134)
Similarly to https://github.com/llvm/llvm-project/pull/131538, we can
also try and check if a predicate is known to wrap given the backedge
taken count.
For now, this just checks directly when we try to create predicated
AddRecs. This both helps to avoid spending compile-time on optimizations
where we know the predicate is false, and can also help to allow
additional vectorization (e.g. by deciding to scalarize memory accesses
when otherwise we would try to create a predicated AddRec with a
predicate that's always false).
The initial version is quite restricted, but can be extended in
follow-ups to cover more cases.
PR: https://github.com/llvm/llvm-project/pull/151134
Added:
Modified:
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index c20b1bdaf94c7..ce4d4ad7a0ab0 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -14952,6 +14952,29 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
if (!AddRec)
return nullptr;
+ // Check if any of the transformed predicates is known to be false. In that
+ // case, it doesn't make sense to convert to a predicated AddRec, as the
+ // versioned loop will never execute.
+ for (const SCEVPredicate *Pred : TransformPreds) {
+ auto *WrapPred = dyn_cast<SCEVWrapPredicate>(Pred);
+ if (!WrapPred || WrapPred->getFlags() != SCEVWrapPredicate::IncrementNSSW)
+ continue;
+
+ const SCEVAddRecExpr *AddRecToCheck = WrapPred->getExpr();
+ const SCEV *ExitCount = getBackedgeTakenCount(AddRecToCheck->getLoop());
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+
+ const SCEV *Step = AddRecToCheck->getStepRecurrence(*this);
+ if (!Step->isOne())
+ continue;
+
+ ExitCount = getTruncateOrSignExtend(ExitCount, Step->getType());
+ const SCEV *Add = getAddExpr(AddRecToCheck->getStart(), ExitCount);
+ if (isKnownPredicate(CmpInst::ICMP_SLT, Add, AddRecToCheck->getStart()))
+ return nullptr;
+ }
+
// Since the transformation was successful, we can now transfer the SCEV
// predicates.
Preds.append(TransformPreds.begin(), TransformPreds.end());
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
index 71c2da2681b3d..e6a81b6f9f6d0 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
@@ -6,16 +6,61 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
; CHECK-LABEL: define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(
; CHECK-SAME: i8 [[FOR_START:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[FOR_START]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLAT]], <4 x i8> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP8]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP0]], i32 0
+; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[TMP0]], i32 1
+; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i8> [[TMP0]], i32 2
+; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i8> [[TMP0]], i32 3
+; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP12]], align 1
+; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP13]], align 1
+; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP14]], align 1
+; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP15]], align 1
+; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP16]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], -8
+; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -7, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[FOR_START]], %[[MIDDLE_BLOCK]] ], [ [[FOR_START]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[FOR_START]], %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[FOR_NEXT]] = and i8 [[FOR_START]], -1
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
; CHECK-NEXT: store i8 [[FOR]], ptr [[GEP_DST]], align 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_NEXT_LCSSA:%.*]] = phi i8 [ [[FOR_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: ret i8 [[FOR_NEXT_LCSSA]]
@@ -61,7 +106,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
; CHECK-NEXT: br label %[[FOR_END:.*]]
@@ -82,7 +127,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
; CHECK-NEXT: store i32 0, ptr [[A_GEP]], align 4
-; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[FOR_LCSSA]]
@@ -142,7 +187,7 @@ define void @sink_dead_inst(ptr %a) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
-; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT1:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
@@ -163,7 +208,7 @@ define void @sink_dead_inst(ptr %a) {
; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
; CHECK-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2
-; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
@@ -205,7 +250,7 @@ define void @unused_recurrence(ptr %a) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
; CHECK-NEXT: br label %[[SCALAR_PH]]
@@ -220,7 +265,7 @@ define void @unused_recurrence(ptr %a) {
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
-; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list