[llvm] [polly] [SCEV] Teach SCEVExpander to use zext nneg when possible (PR #70815)

Tue Oct 31 09:32:45 PDT 2023

https://github.com/preames updated https://github.com/llvm/llvm-project/pull/70815

>From 1eb9cfaee12ab363fc566200bbcfec380366e2c1 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 31 Oct 2023 08:00:38 -0700
Subject: [PATCH 1/2] [SCEV] Teach SCEVExpander to use zext nneg when possible

zext nneg was recently added to the IR in #67982.  Teaching SCEVExpander
to emit nneg when possible is valuable since SCEV may have proved
non-trivial facts about loop bounds which would otherwise be lost when
materializing the value.
---
 .../Transforms/Utils/ScalarEvolutionExpander.cpp   |  5 ++++-
 .../Transforms/IRCE/iv-plus-offset-range-check.ll  |  2 +-
 llvm/test/Transforms/IRCE/wide_indvar.ll           | 14 +++++++-------
 .../promote-iv-to-eliminate-casts.ll               |  4 ++--
 llvm/test/Transforms/IndVarSimplify/zext-nuw.ll    |  2 +-
 .../LoopIdiom/X86/memset-size-compute.ll           |  2 +-
 llvm/test/Transforms/LoopVectorize/reduction.ll    |  4 ++--
 7 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 5c976d574854283..c8cd9c4ee13e65c 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1293,7 +1293,10 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
 
 Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
   Value *V = expand(S->getOperand());
-  return Builder.CreateZExt(V, S->getType());
+  auto *Res = Builder.CreateZExt(V, S->getType());
+  if (auto *I = dyn_cast<Instruction>(Res))
+    I->setNonNeg(SE.isKnownNonNegative(S->getOperand()));
+  return Res;
 }
 
 Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
diff --git a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
index bda7cf82eff97a8..9c3713bdd5db0a8 100644
--- a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
+++ b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
@@ -1088,7 +1088,7 @@ define i8 @test_overflow_check_runtime(i8 %limit, ptr %p) {
 ; CHECK-NEXT:    [[SMAX2:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 -1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i8 [[SMAX2]], 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i8 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[N]] to i16
+; CHECK-NEXT:    [[TMP6:%.*]] = zext nneg i8 [[N]] to i16
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i16 124, [[TMP6]]
 ; CHECK-NEXT:    [[SMIN3:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP7]], i16 0)
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc i16 [[SMIN3]] to i8
diff --git a/llvm/test/Transforms/IRCE/wide_indvar.ll b/llvm/test/Transforms/IRCE/wide_indvar.ll
index 415ed5750dcc496..10eea088af4f216 100644
--- a/llvm/test/Transforms/IRCE/wide_indvar.ll
+++ b/llvm/test/Transforms/IRCE/wide_indvar.ll
@@ -125,7 +125,7 @@ define i32 @test_increasing_slt_slt_wide_non-negative(ptr %n_ptr, ptr %m_ptr) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[N:%.*]] = load i32, ptr [[N_PTR]], align 4, !range [[RNG6:![0-9]+]]
 ; CHECK-NEXT:    [[M:%.*]] = load i64, ptr [[M_PTR]], align 4, !range [[RNG7:![0-9]+]]
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smin.i64(i64 [[M]], i64 [[TMP0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -217,7 +217,7 @@ define i32 @test_increasing_slt_slt_wide_general(ptr %n_ptr, ptr %m_ptr) {
 ; CHECK-NEXT:    [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN]], i64 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i64 [[SMAX1]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[SMIN2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP3]], i64 [[TMP4]])
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN2]], i64 0)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
@@ -309,7 +309,7 @@ define i32 @test_increasing_slt_slt_wide_general_preloop(ptr %n_ptr, ptr %m_ptr)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[M]], [[SMAX1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i64 [[SMAX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[SMIN2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP3]], i64 [[TMP4]])
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN2]], i64 -1)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i64 -1, [[EXIT_PRELOOP_AT]]
@@ -456,7 +456,7 @@ define i32 @test_increasing_slt_slt_wide_multiple_checks(ptr %n_ptr, ptr %m1_ptr
 ; CHECK-NEXT:    [[TMP14:%.*]] = add nsw i64 [[SMAX12]], 1
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP13]], [[TMP14]]
 ; CHECK-NEXT:    [[SMIN13:%.*]] = call i64 @llvm.smin.i64(i64 [[SMIN9]], i64 [[TMP15]])
-; CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[SMIN14:%.*]] = call i64 @llvm.smin.i64(i64 [[SMIN13]], i64 [[TMP16]])
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN14]], i64 0)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
@@ -719,7 +719,7 @@ define i32 @test_increasing_ult_ult_wide_non-negative(ptr %n_ptr, ptr %m_ptr) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[N:%.*]] = load i32, ptr [[N_PTR]], align 4, !range [[RNG6]]
 ; CHECK-NEXT:    [[M:%.*]] = load i64, ptr [[M_PTR]], align 4, !range [[RNG7]]
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[M]], i64 [[TMP0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -809,7 +809,7 @@ define i32 @test_increasing_ult_ult_wide_general(ptr %n_ptr, ptr %m_ptr) {
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN]], i64 -1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[SMAX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -918,7 +918,7 @@ define i32 @test_increasing_ult_ult_wide_multiple_checks(ptr %n_ptr, ptr %m1_ptr
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i64 [[SMAX7]], 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP9]], [[TMP10]]
 ; CHECK-NEXT:    [[UMIN8:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN5]], i64 [[TMP11]])
-; CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN8]], i64 [[TMP12]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 8819df6d013feed..d912540c95657f3 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -230,7 +230,7 @@ define void @promote_latch_condition_decrementing_loop_02(ptr %p, ptr %a) {
 ; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
 ; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[LEN]] to i64
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loopexit.loopexit:
 ; CHECK-NEXT:    br label [[LOOPEXIT]]
@@ -273,7 +273,7 @@ define void @promote_latch_condition_decrementing_loop_03(ptr %p, ptr %a) {
 ; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
 ; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[LEN]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loopexit.loopexit:
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index 01eaa46d981e45b..d24f9a4e40e3895 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -16,7 +16,7 @@ define void @_Z3fn1v() {
 ; CHECK-NEXT:    br label [[DOTPREHEADER4_LR_PH:%.*]]
 ; CHECK:       .preheader4.lr.ph:
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[X4]], -1
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
index 7aa7e6e69bcb780..ea2cfe74be2649b 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
@@ -17,7 +17,7 @@ define void @test(ptr %ptr) {
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
 ; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index aba2532706de5c6..25352ee0991bade 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1200,7 +1200,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i16 [[SMAX]], 5
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -1278,7 +1278,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i16 [[SMAX]], 5
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:

>From b3daadcb50cc1ae0111c9a4cf08c296f3a23e31a Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 31 Oct 2023 09:32:26 -0700
Subject: [PATCH 2/2] Fix polly build failure

---
 polly/test/CodeGen/partial_write_in_region_with_loop.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polly/test/CodeGen/partial_write_in_region_with_loop.ll b/polly/test/CodeGen/partial_write_in_region_with_loop.ll
index 5af09852944b143..48a9dbef21d106a 100644
--- a/polly/test/CodeGen/partial_write_in_region_with_loop.ll
+++ b/polly/test/CodeGen/partial_write_in_region_with_loop.ll
@@ -9,7 +9,7 @@
 ; CHECK:polly.stmt.bb3:
 ; CHECK-NEXT:  %polly.subregion.iv = phi i32 [ %polly.subregion.iv.inc, %polly.stmt.bb5.cont ], [ 0, %polly.stmt.bb3.entry ]
 ; CHECK-NEXT:  %polly.j.0 = phi i64 [ %j.0.phiops.reload, %polly.stmt.bb3.entry ], [ %p_tmp10, %polly.stmt.bb5.cont ]
-; CHECK-NEXT:  %8 = zext i64 %polly.indvar to i65
+; CHECK-NEXT:  %8 = zext nneg i64 %polly.indvar to i65
 ; CHECK-NEXT:  %9 = add nsw i64 %polly.indvar, -1
 ; CHECK-NEXT:  %10 = zext i64 %9 to i65
 ; CHECK-NEXT:  %11 = mul i65 %8, %10