[llvm] [SCEV] BECount to zero if `((-1 + (1 smax %x))<nsw> /u %x)` holds (PR #104580)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 16 03:53:15 PDT 2024


https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/104580

The SCEV expression `((-1 + (1 smax %x))<nsw> /u %x)` can be folded to zero for positive %x, and the add node is guaranteed not to wrap in the signed sense.

>From 9ca24c25ed4a2bd503c0e510a9c7a23f83c39044 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 16 Aug 2024 11:51:05 +0200
Subject: [PATCH 1/2] [SCEV] Introduce test for PR104580 (NFC)

---
 .../udiv-of-x-xsmaxone-fold.ll                | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll

diff --git a/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
new file mode 100644
index 00000000000000..a9f6c1e62dfbc3
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
+
+ at g_var = external local_unnamed_addr global i32, align 4
+
+define void @test(i32 noundef %x) {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Classifying expressions for: @test
+; CHECK-NEXT:    %g_var.promoted = load i32, ptr @g_var, align 4
+; CHECK-NEXT:    --> %g_var.promoted U: full-set S: full-set
+; CHECK-NEXT:    %smax = tail call i32 @llvm.smax.i32(i32 %x, i32 1)
+; CHECK-NEXT:    --> (1 smax %x) U: [1,-2147483648) S: [1,-2147483648)
+; CHECK-NEXT:    %0 = add nsw i32 %smax, -1
+; CHECK-NEXT:    --> (-1 + (1 smax %x))<nsw> U: [0,2147483647) S: [0,2147483647)
+; CHECK-NEXT:    %1 = udiv i32 %0, %x
+; CHECK-NEXT:    --> ((-1 + (1 smax %x))<nsw> /u %x) U: [0,2147483647) S: [0,2147483647)
+; CHECK-NEXT:    %2 = add nuw nsw i32 %1, 1
+; CHECK-NEXT:    --> (1 + ((-1 + (1 smax %x))<nsw> /u %x))<nuw><nsw> U: [1,-2147483648) S: [1,-2147483648)
+;
+; CHECK:       Determining loop execution counts for: @test
+; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  %g_var.promoted = load i32, ptr @g_var, align 4
+  %smax = tail call i32 @llvm.smax.i32(i32 %x, i32 1)
+  %0 = add nsw i32 %smax, -1
+  %1 = udiv i32 %0, %x
+  %2 = add nuw nsw i32 %1, 1
+  %min.iters.check = icmp ult i32 %1, 7
+  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
+
+for.body.preheader:
+  %i.06.ph = phi i32 [ 0, %entry ], [ %ind.end, %middle.block ]
+  %add45.ph = phi i32 [ %g_var.promoted, %entry ], [ %7, %middle.block ]
+  br label %for.body
+
+vector.ph:
+  %n.vec = and i32 %2, -8
+  %ind.end = mul i32 %n.vec, %x
+  %3 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %g_var.promoted, i64 0
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i64 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %vec.phi = phi <4 x i32> [ %3, %vector.ph ], [ %4, %vector.body ]
+  %vec.phi7 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ]
+  %4 = add <4 x i32> %vec.phi, %broadcast.splat
+  %5 = add <4 x i32> %vec.phi7, %broadcast.splat
+  %index.next = add nuw i32 %index, 8
+  %6 = icmp eq i32 %index.next, %n.vec
+  br i1 %6, label %middle.block, label %vector.body
+
+middle.block:
+  %bin.rdx = add <4 x i32> %5, %4
+  %7 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
+  %cmp.n = icmp eq i32 %2, %n.vec
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
+
+for.cond.cleanup:
+  %add.lcssa = phi i32 [ %7, %middle.block ], [ %add, %for.body ]
+  store i32 %add.lcssa, ptr @g_var, align 4
+  ret void
+
+for.body:
+  %i.06 = phi i32 [ %add1, %for.body ], [ %i.06.ph, %for.body.preheader ]
+  %add45 = phi i32 [ %add, %for.body ], [ %add45.ph, %for.body.preheader ]
+  %add = add nsw i32 %add45, %x
+  %add1 = add nsw i32 %i.06, %x
+  %cmp = icmp slt i32 %add1, 1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)

>From f5415d86e626ebf0618c72fb95753556b43dd094 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 16 Aug 2024 12:44:20 +0200
Subject: [PATCH 2/2] [SCEV] BECount to zero if `((-1 + (1 smax %x))<nsw> /u
 %x)` holds

The SCEV expression `((-1 + (1 smax %x))<nsw> /u %x)` can be folded
to zero for positive %x, and when the add node is guaranteed not to
wrap in signed sense.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 19 +++++++++++++++++++
 .../udiv-of-x-xsmaxone-fold.ll                |  4 ++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 487844f000ac69..d046fb4f2e7a99 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3547,6 +3547,25 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     }
   }
 
+  // ((-1 + (1 smax %x))<nsw> /u %x) evaluates to zero, for any positive %x.
+  if (const auto *AE = dyn_cast<SCEVAddExpr>(LHS);
+      AE && AE->getNumOperands() == 2 && AE->hasNoSignedWrap()) {
+    bool FoundMinusOne = false, FoundOneSMaxRHS = false;
+    for (const SCEV *Op : AE->operands()) {
+      if (Op->isAllOnesValue()) {
+        FoundMinusOne = true;
+      } else if (const auto *MME = dyn_cast<SCEVMinMaxExpr>(Op)) {
+        if (MME->getNumOperands() == 2 && MME->getSCEVType() == scSMaxExpr)
+          if (hasOperand(Op, RHS) && hasOperand(Op, getOne(RHS->getType())))
+            FoundOneSMaxRHS = true;
+      }
+    }
+
+    if (FoundMinusOne && FoundOneSMaxRHS)
+      // The backedge is never taken.
+      return getZero(LHS->getType());
+  }
+
   // The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs
   // changes). Make sure we get a new one.
   IP = nullptr;
diff --git a/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
index a9f6c1e62dfbc3..a884a66dcf5d3c 100644
--- a/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
+++ b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
@@ -13,9 +13,9 @@ define void @test(i32 noundef %x) {
 ; CHECK-NEXT:    %0 = add nsw i32 %smax, -1
 ; CHECK-NEXT:    --> (-1 + (1 smax %x))<nsw> U: [0,2147483647) S: [0,2147483647)
 ; CHECK-NEXT:    %1 = udiv i32 %0, %x
-; CHECK-NEXT:    --> ((-1 + (1 smax %x))<nsw> /u %x) U: [0,2147483647) S: [0,2147483647)
+; CHECK-NEXT:    --> 0 U: [0,1) S: [0,1)
 ; CHECK-NEXT:    %2 = add nuw nsw i32 %1, 1
-; CHECK-NEXT:    --> (1 + ((-1 + (1 smax %x))<nsw> /u %x))<nuw><nsw> U: [1,-2147483648) S: [1,-2147483648)
+; CHECK-NEXT:    --> 1 U: [1,2) S: [1,2)
 ;
 ; CHECK:       Determining loop execution counts for: @test
 ; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.



More information about the llvm-commits mailing list