[llvm] [polly] [SCEV] Teach SCEVExpander to use zext nneg when possible (PR #70815)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 09:32:45 PDT 2023
https://github.com/preames updated https://github.com/llvm/llvm-project/pull/70815
>From 1eb9cfaee12ab363fc566200bbcfec380366e2c1 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 31 Oct 2023 08:00:38 -0700
Subject: [PATCH 1/2] [SCEV] Teach SCEVExpander to use zext nneg when possible
zext nneg was recently added to the IR in #67982. Teaching SCEVExpander
to emit nneg when possible is valuable since SCEV may have proved
non-trivial facts about loop bounds which would otherwise be lost when
materializing the value.
---
.../Transforms/Utils/ScalarEvolutionExpander.cpp | 5 ++++-
.../Transforms/IRCE/iv-plus-offset-range-check.ll | 2 +-
llvm/test/Transforms/IRCE/wide_indvar.ll | 14 +++++++-------
.../promote-iv-to-eliminate-casts.ll | 4 ++--
llvm/test/Transforms/IndVarSimplify/zext-nuw.ll | 2 +-
.../LoopIdiom/X86/memset-size-compute.ll | 2 +-
llvm/test/Transforms/LoopVectorize/reduction.ll | 4 ++--
7 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 5c976d574854283..c8cd9c4ee13e65c 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1293,7 +1293,10 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
Value *V = expand(S->getOperand());
- return Builder.CreateZExt(V, S->getType());
+ auto *Res = Builder.CreateZExt(V, S->getType());
+ if (auto *I = dyn_cast<Instruction>(Res))
+ I->setNonNeg(SE.isKnownNonNegative(S->getOperand()));
+ return Res;
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
diff --git a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
index bda7cf82eff97a8..9c3713bdd5db0a8 100644
--- a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
+++ b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
@@ -1088,7 +1088,7 @@ define i8 @test_overflow_check_runtime(i8 %limit, ptr %p) {
; CHECK-NEXT: [[SMAX2:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 -1)
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i8 [[SMAX2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = mul i8 [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[N]] to i16
+; CHECK-NEXT: [[TMP6:%.*]] = zext nneg i8 [[N]] to i16
; CHECK-NEXT: [[TMP7:%.*]] = sub i16 124, [[TMP6]]
; CHECK-NEXT: [[SMIN3:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP7]], i16 0)
; CHECK-NEXT: [[TMP8:%.*]] = trunc i16 [[SMIN3]] to i8
diff --git a/llvm/test/Transforms/IRCE/wide_indvar.ll b/llvm/test/Transforms/IRCE/wide_indvar.ll
index 415ed5750dcc496..10eea088af4f216 100644
--- a/llvm/test/Transforms/IRCE/wide_indvar.ll
+++ b/llvm/test/Transforms/IRCE/wide_indvar.ll
@@ -125,7 +125,7 @@ define i32 @test_increasing_slt_slt_wide_non-negative(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N:%.*]] = load i32, ptr [[N_PTR]], align 4, !range [[RNG6:![0-9]+]]
; CHECK-NEXT: [[M:%.*]] = load i64, ptr [[M_PTR]], align 4, !range [[RNG7:![0-9]+]]
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smin.i64(i64 [[M]], i64 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -217,7 +217,7 @@ define i32 @test_increasing_slt_slt_wide_general(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN]], i64 -1)
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[SMAX1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[SMIN2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP3]], i64 [[TMP4]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN2]], i64 0)
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
@@ -309,7 +309,7 @@ define i32 @test_increasing_slt_slt_wide_general_preloop(ptr %n_ptr, ptr %m_ptr)
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[M]], [[SMAX1]]
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[SMAX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[SMIN2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP3]], i64 [[TMP4]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN2]], i64 -1)
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i64 -1, [[EXIT_PRELOOP_AT]]
@@ -456,7 +456,7 @@ define i32 @test_increasing_slt_slt_wide_multiple_checks(ptr %n_ptr, ptr %m1_ptr
; CHECK-NEXT: [[TMP14:%.*]] = add nsw i64 [[SMAX12]], 1
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[SMIN13:%.*]] = call i64 @llvm.smin.i64(i64 [[SMIN9]], i64 [[TMP15]])
-; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[SMIN14:%.*]] = call i64 @llvm.smin.i64(i64 [[SMIN13]], i64 [[TMP16]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN14]], i64 0)
; CHECK-NEXT: [[TMP17:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
@@ -719,7 +719,7 @@ define i32 @test_increasing_ult_ult_wide_non-negative(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N:%.*]] = load i32, ptr [[N_PTR]], align 4, !range [[RNG6]]
; CHECK-NEXT: [[M:%.*]] = load i64, ptr [[M_PTR]], align 4, !range [[RNG7]]
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[M]], i64 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -809,7 +809,7 @@ define i32 @test_increasing_ult_ult_wide_general(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN]], i64 -1)
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -918,7 +918,7 @@ define i32 @test_increasing_ult_ult_wide_multiple_checks(ptr %n_ptr, ptr %m1_ptr
; CHECK-NEXT: [[TMP10:%.*]] = add nsw i64 [[SMAX7]], 1
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[UMIN8:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN5]], i64 [[TMP11]])
-; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN8]], i64 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP13]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 8819df6d013feed..d912540c95657f3 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -230,7 +230,7 @@ define void @promote_latch_condition_decrementing_loop_02(ptr %p, ptr %a) {
; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
; CHECK: preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LEN]] to i64
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loopexit.loopexit:
; CHECK-NEXT: br label [[LOOPEXIT]]
@@ -273,7 +273,7 @@ define void @promote_latch_condition_decrementing_loop_03(ptr %p, ptr %a) {
; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
; CHECK: preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LEN]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loopexit.loopexit:
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index 01eaa46d981e45b..d24f9a4e40e3895 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -16,7 +16,7 @@ define void @_Z3fn1v() {
; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]]
; CHECK: .preheader4.lr.ph:
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
index 7aa7e6e69bcb780..ea2cfe74be2649b 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
@@ -17,7 +17,7 @@ define void @test(ptr %ptr) {
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index aba2532706de5c6..25352ee0991bade 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1200,7 +1200,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i16 [[SMAX]], 5
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -1278,7 +1278,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i16 [[SMAX]], 5
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
>From b3daadcb50cc1ae0111c9a4cf08c296f3a23e31a Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 31 Oct 2023 09:32:26 -0700
Subject: [PATCH 2/2] Fix polly build failure
---
polly/test/CodeGen/partial_write_in_region_with_loop.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/polly/test/CodeGen/partial_write_in_region_with_loop.ll b/polly/test/CodeGen/partial_write_in_region_with_loop.ll
index 5af09852944b143..48a9dbef21d106a 100644
--- a/polly/test/CodeGen/partial_write_in_region_with_loop.ll
+++ b/polly/test/CodeGen/partial_write_in_region_with_loop.ll
@@ -9,7 +9,7 @@
; CHECK:polly.stmt.bb3:
; CHECK-NEXT: %polly.subregion.iv = phi i32 [ %polly.subregion.iv.inc, %polly.stmt.bb5.cont ], [ 0, %polly.stmt.bb3.entry ]
; CHECK-NEXT: %polly.j.0 = phi i64 [ %j.0.phiops.reload, %polly.stmt.bb3.entry ], [ %p_tmp10, %polly.stmt.bb5.cont ]
-; CHECK-NEXT: %8 = zext i64 %polly.indvar to i65
+; CHECK-NEXT: %8 = zext nneg i64 %polly.indvar to i65
; CHECK-NEXT: %9 = add nsw i64 %polly.indvar, -1
; CHECK-NEXT: %10 = zext i64 %9 to i65
; CHECK-NEXT: %11 = mul i65 %8, %10
More information about the llvm-commits
mailing list