[llvm] [SCEV] BECount to zero if `((-1 + (1 smax %x))<nsw> /u %x)` holds (PR #104580)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 26 00:51:44 PDT 2024


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/104580

>From b348ebe4c274569bb7b9527bd5baeebf4812db84 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 16 Aug 2024 11:51:05 +0200
Subject: [PATCH 1/2] [SCEV] Introduce test for PR104580 (NFC)

---
 .../udiv-of-x-xsmaxone-fold.ll                | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll

diff --git a/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
new file mode 100644
index 00000000000000..6c73bfe8596442
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -disable-output -passes="loop-vectorize,print<scalar-evolution>" < %s 2>&1 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+ at g_var = external local_unnamed_addr global i32, align 4
+
+define void @test(i32 noundef %x) {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Classifying expressions for: @test
+; CHECK-NEXT:    %g_var.promoted = load i32, ptr @g_var, align 4
+; CHECK-NEXT:    --> %g_var.promoted U: full-set S: full-set
+; CHECK-NEXT:    %smax = call i32 @llvm.smax.i32(i32 %x, i32 1)
+; CHECK-NEXT:    --> (1 smax %x) U: [1,-2147483648) S: [1,-2147483648)
+; CHECK-NEXT:    %0 = add nsw i32 %smax, -1
+; CHECK-NEXT:    --> (-1 + (1 smax %x))<nsw> U: [0,2147483647) S: [0,2147483647)
+; CHECK-NEXT:    %1 = udiv i32 %0, %x
+; CHECK-NEXT:    --> ((-1 + (1 smax %x))<nsw> /u %x) U: [0,2147483647) S: [0,2147483647)
+; CHECK-NEXT:    %2 = add nuw nsw i32 %1, 1
+; CHECK-NEXT:    --> (1 + ((-1 + (1 smax %x))<nsw> /u %x))<nuw><nsw> U: [1,-2147483648) S: [1,-2147483648)
+;
+; CHECK-NEXT:  Determining loop execution counts for: @test
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((((-1 * (%x + %bc.resume.val)<nsw>) + (-1 * (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))<nuw><nsw> + (1 smax (%x + %bc.resume.val)<nsw>)) /u (1 umax %x)) + (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 -2147483647
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((((-1 * (%x + %bc.resume.val)<nsw>) + (-1 * (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))<nuw><nsw> + (1 smax (%x + %bc.resume.val)<nsw>)) /u (1 umax %x)) + (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+;
+entry:
+  %g_var.promoted = load i32, ptr @g_var, align 4
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  store i32 %add.lcssa, ptr @g_var, align 4
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+  %add45 = phi i32 [ %g_var.promoted, %entry ], [ %add, %for.body ]
+  %add = add nsw i32 %add45, %x
+  %add1 = add nsw i32 %i.06, %x
+  %cmp = icmp slt i32 %add1, 1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.mustprogress"}

>From a91362fbfaf2cb1fe27cd18f815bde3b23ce57a0 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 16 Aug 2024 12:44:20 +0200
Subject: [PATCH 2/2] [SCEV] BECount to zero if `((-1 + (1 smax %x))<nsw> /u
 %x)` holds

The SCEV expression `((-1 + (1 smax %x))<nsw> /u %x)` can be folded
to zero for positive %x, so long as the add node is guaranteed not
to wrap in the signed sense.

Proof: https://alive2.llvm.org/ce/z/9fN_e7.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 14 +++++++++++
 .../udiv-of-x-xsmaxone-fold.ll                | 25 ++++++++++---------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 21a1c74eefc071..4a40b678972fe6 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3547,6 +3547,20 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     }
   }
 
+  // ((-1 + (1 smax %x))<nsw> /u %x) evaluates to zero, for any positive %x.
+  if (const auto *AE = dyn_cast<SCEVAddExpr>(LHS);
+      AE && AE->getNumOperands() == 2 && AE->hasNoSignedWrap()) {
+    if (isa<SCEVConstant>(AE->getOperand(0)) &&
+        cast<SCEVConstant>(AE->getOperand(0))->isAllOnesValue()) {
+      const auto *MME = dyn_cast<SCEVSMaxExpr>(AE->getOperand(1));
+      if (MME && MME->getNumOperands() == 2)
+        if (isa<SCEVConstant>(MME->getOperand(0)) &&
+            cast<SCEVConstant>(MME->getOperand(0))->isOne())
+          if (MME->getOperand(1) == RHS)
+            return getZero(LHS->getType());
+    }
+  }
+
   // The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs
   // changes). Make sure we get a new one.
   IP = nullptr;
diff --git a/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
index 6c73bfe8596442..4a680e83f55311 100644
--- a/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
+++ b/llvm/test/Analysis/ScalarEvolution/udiv-of-x-xsmaxone-fold.ll
@@ -10,19 +10,20 @@ define void @test(i32 noundef %x) {
 ; CHECK-NEXT:  Classifying expressions for: @test
 ; CHECK-NEXT:    %g_var.promoted = load i32, ptr @g_var, align 4
 ; CHECK-NEXT:    --> %g_var.promoted U: full-set S: full-set
-; CHECK-NEXT:    %smax = call i32 @llvm.smax.i32(i32 %x, i32 1)
-; CHECK-NEXT:    --> (1 smax %x) U: [1,-2147483648) S: [1,-2147483648)
-; CHECK-NEXT:    %0 = add nsw i32 %smax, -1
-; CHECK-NEXT:    --> (-1 + (1 smax %x))<nsw> U: [0,2147483647) S: [0,2147483647)
-; CHECK-NEXT:    %1 = udiv i32 %0, %x
-; CHECK-NEXT:    --> ((-1 + (1 smax %x))<nsw> /u %x) U: [0,2147483647) S: [0,2147483647)
-; CHECK-NEXT:    %2 = add nuw nsw i32 %1, 1
-; CHECK-NEXT:    --> (1 + ((-1 + (1 smax %x))<nsw> /u %x))<nuw><nsw> U: [1,-2147483648) S: [1,-2147483648)
-;
+; CHECK-NEXT:    %add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT:    --> {(%x + %g_var.promoted),+,%x}<nw><%for.body> U: full-set S: full-set --> (%x + %g_var.promoted) U: full-set S: full-set
+; CHECK-NEXT:    %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+; CHECK-NEXT:    --> {0,+,%x}<nuw><nsw><%for.body> U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %add45 = phi i32 [ %g_var.promoted, %entry ], [ %add, %for.body ]
+; CHECK-NEXT:    --> {%g_var.promoted,+,%x}<nsw><%for.body> U: full-set S: full-set Exits: %g_var.promoted LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %add = add nsw i32 %add45, %x
+; CHECK-NEXT:    --> {(%x + %g_var.promoted),+,%x}<nw><%for.body> U: full-set S: full-set Exits: (%x + %g_var.promoted) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %add1 = add nsw i32 %i.06, %x
+; CHECK-NEXT:    --> {%x,+,%x}<nsw><%for.body> U: full-set S: full-set Exits: %x LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test
-; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((((-1 * (%x + %bc.resume.val)<nsw>) + (-1 * (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))<nuw><nsw> + (1 smax (%x + %bc.resume.val)<nsw>)) /u (1 umax %x)) + (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))
-; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 -2147483647
-; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((((-1 * (%x + %bc.resume.val)<nsw>) + (-1 * (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))<nuw><nsw> + (1 smax (%x + %bc.resume.val)<nsw>)) /u (1 umax %x)) + (1 umin ((-1 * (%x + %bc.resume.val)<nsw>) + (1 smax (%x + %bc.resume.val)<nsw>))))
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is i32 0
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 0
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is i32 0
 ; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
 ;
 entry:



More information about the llvm-commits mailing list