[llvm] [SCEV] Fold zext(C+A)<nsw> -> (sext(C) + zext(A))<nsw> if possible. (PR #142599)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 14:06:24 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/142599
>From 5869edc89c907f4131b1fcbe9d039878e06398a8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 3 Jun 2025 10:38:19 +0100
Subject: [PATCH 1/3] [SCEV] Fold zext(C+A)<nsw> -> (sext(C) + zext(A))<nsw> if
possible.
Simplify zext(C+A)<nsw> -> (sext(C) + zext(A))<nsw> if
* zext (C + A)<nsw> >=s 0 and
* A >=s V.
For now this is limited to cases where the first operand is a constant,
so the SExt can be folded to a new constant. This can be relaxed in the
future.
Alive2 proof of the general pattern and the test changes in zext-nuw.ll
(times out in the online instance but verifies locally)
https://alive2.llvm.org/ce/z/_BtyGy
---
llvm/lib/Analysis/ScalarEvolution.cpp | 12 +++++++++++
.../max-backedge-taken-count-guard-info.ll | 2 +-
.../Transforms/IndVarSimplify/zext-nuw.ll | 6 ++----
.../LoopIdiom/X86/memset-size-compute.ll | 8 ++++----
.../Transforms/LoopVectorize/reduction.ll | 20 +++++++++----------
5 files changed, 29 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 2dfe625eb0dcc..03fda7e2064ae 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1793,6 +1793,18 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
}
+ const SCEVConstant *C;
+ const SCEV *A;
+ // zext (C + A)<nsw> -> (sext(C) + zext(A))<nsw> if zext (C + A)<nsw> >=s 0
+ // and A >=s V.
+ if (SA->hasNoSignedWrap() && isKnownNonNegative(SA) &&
+ match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
+ isKnownPredicate(CmpInst::ICMP_SGE, A, C)) {
+ SmallVector<const SCEV *, 4> Ops = {getSignExtendExpr(C, Ty, Depth + 1),
+ getZeroExtendExpr(A, Ty, Depth + 1)};
+ return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
+ }
+
// zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
// if D + (C - D + x + y + ...) could be proven to not unsigned wrap
// where D maximizes the number of trailing zeros of (C - D + x + y + ...)
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 9bf2427eddb9c..1a04b0c72cf2c 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) {
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,3) S: [0,3) Exits: (-1 + %N)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i16, ptr %pred, i32 %iv
-; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N)<nsw> to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: (-2 + (2 * (zext i32 %N to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned3
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index d24f9a4e40e38..17921afc5ff06 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -15,11 +15,9 @@ define void @_Z3fn1v() {
; CHECK-NEXT: [[J_SROA_0_0_COPYLOAD:%.*]] = load i8, ptr [[X5]], align 1
; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]]
; CHECK: .preheader4.lr.ph:
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[X4]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
; CHECK-NEXT: br label [[DOTPREHEADER4:%.*]]
; CHECK: .preheader4:
; CHECK-NEXT: [[K_09:%.*]] = phi ptr [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[X25:%.*]], [[X22:%.*]] ]
diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
index ea2cfe74be264..feef268bc7412 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
@@ -15,11 +15,11 @@ define void @test(ptr %ptr) {
; CHECK: for.body.preheader:
; CHECK-NEXT: [[LIM_0:%.*]] = phi i32 [ 65, [[ENTRY:%.*]] ], [ 1, [[DEAD:%.*]] ]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LIM_0]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 2)
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], -8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP3]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index 757be041afbb5..af6aa9373b3cb 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1199,13 +1199,13 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16
+; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], -4
+; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nsw i32 [[N_VEC]] to i16
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -1222,7 +1222,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
@@ -1277,13 +1277,13 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16
+; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], -4
+; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nsw i32 [[N_VEC]] to i16
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -1300,7 +1300,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
>From b45d22307a271b36879c71656e472541aada602a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 3 Jun 2025 16:37:46 +0100
Subject: [PATCH 2/3] !fixup drop A >= C requierement
---
llvm/lib/Analysis/ScalarEvolution.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 03fda7e2064ae..dc334b5092a35 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1795,13 +1795,11 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
const SCEVConstant *C;
const SCEV *A;
- // zext (C + A)<nsw> -> (sext(C) + zext(A))<nsw> if zext (C + A)<nsw> >=s 0
- // and A >=s V.
+ // zext (C + A)<nsw> -> (sext(C) + sext(A))<nsw> if zext (C + A)<nsw> >=s 0.
if (SA->hasNoSignedWrap() && isKnownNonNegative(SA) &&
- match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
- isKnownPredicate(CmpInst::ICMP_SGE, A, C)) {
+ match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A)))) {
SmallVector<const SCEV *, 4> Ops = {getSignExtendExpr(C, Ty, Depth + 1),
- getZeroExtendExpr(A, Ty, Depth + 1)};
+ getSignExtendExpr(A, Ty, Depth + 1)};
return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
}
>From 7f8f9370d9849921580d19c09ec1ff91b1351b65 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 3 Jun 2025 19:58:47 +0100
Subject: [PATCH 3/3] !fixup add test showing regression.
---
.../Transforms/IndVarSimplify/add-nsw-zext-fold.ll | 13 +++++++------
llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll | 7 ++++---
2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll b/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll
index 1de41e47a8569..c263b3f2d060b 100644
--- a/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll
@@ -12,14 +12,15 @@ define void @add_nsw_zext_fold_results_in_sext(i64 %len) {
; CHECK-NEXT: [[LEN_TRUNC:%.*]] = trunc i64 [[LEN]] to i32
; CHECK-NEXT: [[LZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[LEN_TRUNC]], i1 false)
; CHECK-NEXT: [[SUB_I:%.*]] = lshr i32 [[LZ]], 3
-; CHECK-NEXT: [[ADD_I:%.*]] = sub i32 5, [[SUB_I]]
; CHECK-NEXT: [[PRECOND:%.*]] = icmp eq i32 [[SUB_I]], 5
; CHECK-NEXT: br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
; CHECK: [[LOOP_PREHEADER]]:
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[ADD_I]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[SUB_I]]
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 5
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP1]], %[[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2]], %[[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[SH_PROM:%.*]] = zext nneg i32 [[IV_NEXT]] to i64
@@ -65,9 +66,9 @@ define void @add_nsw_zext_fold_results_in_sext_known_positive(i32 %mask, ptr %sr
; CHECK-NEXT: [[PRECOND:%.*]] = icmp slt i32 [[ADD]], 0
; CHECK-NEXT: br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[PH:.*]]
; CHECK: [[PH]]:
-; CHECK-NEXT: [[TMP0:%.*]] = sub i32 78, [[SPEC_SELECT]]
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sub i32 0, [[SPEC_SELECT]]
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 79
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP2]]
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
index df32e60d5065a..2efb72a017899 100644
--- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -9,9 +9,10 @@ define void @fold_add_zext_to_sext(ptr %dst, i1 %start) {
; CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[START]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 25, [[START_EXT]]
-; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[TMP2]] to i64
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[START_EXT]]
+; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP5]], 100
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP4]], i1 false)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
More information about the llvm-commits
mailing list