[llvm] [loop-idiom] Forget outer loop scev when loop-idiom introduces memset… (PR #150916)

Sushant Gokhale via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 29 03:46:37 PDT 2025


https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/150916

>From b3fc9c68e31c264f4e53a973c97bdef126e679be Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Mon, 28 Jul 2025 02:48:10 -0700
Subject: [PATCH 1/3] [loop-idiom] Forget outer loop scev when loop-idiom
 introduces memset in the outer loop

Before loop-idiom pass, we have a store into the inner loop which is
considered simple and one that does not have any side effects on the
loop. Post loop-idiom pass, we get a memset into the outer loop that is
considered to introduce side effects on the loop. This changes the
backedge taken count before and after the pass and hence, the crash with
verify-scev.
Ideally, I would expect store to be considered as an instruction that
introduces side effects like the memset but this generates lot of test
failures.

Fixes #149377
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  |  5 ++
 .../AArch64/introduce-memset-in-outerloop.ll  | 48 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 03b92d3338a98..7cee8a7525c42 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1154,6 +1154,11 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 
   NewCall->setDebugLoc(TheStore->getDebugLoc());
 
+  auto *MemsetLoop = LI->getLoopFor(NewCall->getParent());
+  // FIXME: We should invalidate scev only if the loop already does not have any
+  // side effects.
+  SE->forgetLoop(MemsetLoop);
+
   if (MSSAU) {
     MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
         NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll b/llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll
new file mode 100644
index 0000000000000..924db26cd573e
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll
@@ -0,0 +1,48 @@
+; REQUIRES: asserts
+; RUN: opt <%s -p "loop(loop-idiom)" -verify-scev -o /dev/null
+
+
+; IR corresponds to the following C test:
+; extern char a[];
+; void foo() {
+;   for (long c = 0; c < 6; c += -2078836808675943215)
+;     for (long d; d < 6; d++)
+;       a[c + d] = 0;
+; }
+
+; Make sure we do not crash when verifying scev.
+
+ at a = external global [0 x i8]
+
+define void @foo()  {
+entry:
+  br label %outerL
+
+outerL:                              ; preds = %entry, %outerLatch
+  %e = phi i64 [ undef, %entry ], [ %lcssa, %outerLatch ]
+  %c = phi i64 [ 0, %entry ], [ %c.next, %outerLatch ]
+  %e.cmp = icmp slt i64 %e, 6
+  br i1 %e.cmp, label %innerL, label %outerLatch
+
+innerL:                                          ; preds = %outerL, %innerL
+  %d = phi i64 [ %d.next, %innerL ], [ %e, %outerL ]
+  %add = add nsw i64 %d, %c
+  %arrayidx = getelementptr inbounds [0 x i8], ptr @a, i64 0, i64 %add
+  store i8 0, ptr %arrayidx
+  %d.next = add nsw i64 %d, 1
+  %d.cmp = icmp slt i64 %d, 5
+  br i1 %d.cmp, label %innerL, label %outerLatch, !llvm.loop !0
+
+outerLatch:                                         ; preds = %innerL, %outerL
+  %lcssa = phi i64 [ %e, %outerL ], [ %d.next, %innerL ]
+  %c.next = add nsw i64 %c, -2078836808675943215
+  %c.cmp = icmp slt i64 %c, 2078836808675943221
+  br i1 %c.cmp, label %outerL, label %exit, !llvm.loop !1
+
+exit:                                         ; preds = %outerLatch
+  ret void
+}
+
+!0 = distinct !{!0, !2}
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.mustprogress"}

>From c69ec2581229685eb778f10b303a703b8a87f078 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Tue, 29 Jul 2025 03:22:58 -0700
Subject: [PATCH 2/3] address comments

---
 llvm/lib/Analysis/ScalarEvolution.cpp                    | 8 ++++++++
 llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp        | 5 -----
 .../{AArch64 => }/introduce-memset-in-outerloop.ll       | 9 ++++-----
 3 files changed, 12 insertions(+), 10 deletions(-)
 rename llvm/test/Transforms/LoopIdiom/{AArch64 => }/introduce-memset-in-outerloop.ll (82%)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 0990a0daac80c..bbfbf8c02b3ab 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -117,6 +117,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InterleavedRange.h"
 #include "llvm/Support/KnownBits.h"
+#include "llvm/Support/ModRef.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -7421,6 +7422,13 @@ ScalarEvolution::getLoopProperties(const Loop *L) {
       if (auto *SI = dyn_cast<StoreInst>(I))
         return !SI->isSimple();
 
+      // Check if the function accesses inaccessible memory.
+      if (auto *CI = dyn_cast<CallInst>(I)) {
+        auto ME = CI->getMemoryEffects();
+        if (!isModSet(ME.getModRef(IRMemLocation::InaccessibleMem)))
+          return false;
+      }
+
       return I->mayThrow() || I->mayWriteToMemory();
     };
 
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 7cee8a7525c42..03b92d3338a98 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1154,11 +1154,6 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 
   NewCall->setDebugLoc(TheStore->getDebugLoc());
 
-  auto *MemsetLoop = LI->getLoopFor(NewCall->getParent());
-  // FIXME: We should invalidate scev only if the loop already does not have any
-  // side effects.
-  SE->forgetLoop(MemsetLoop);
-
   if (MSSAU) {
     MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
         NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll b/llvm/test/Transforms/LoopIdiom/introduce-memset-in-outerloop.ll
similarity index 82%
rename from llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll
rename to llvm/test/Transforms/LoopIdiom/introduce-memset-in-outerloop.ll
index 924db26cd573e..bf78ebae3dc3d 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/introduce-memset-in-outerloop.ll
+++ b/llvm/test/Transforms/LoopIdiom/introduce-memset-in-outerloop.ll
@@ -1,6 +1,7 @@
-; REQUIRES: asserts
-; RUN: opt <%s -p "loop(loop-idiom)" -verify-scev -o /dev/null
+; RUN: opt <%s -p "print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
+; RUN: opt <%s -p "loop(loop-idiom),print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
+; CHECK: backedge-taken count is i64 1
 
 ; IR corresponds to the following C test:
 ; extern char a[];
@@ -10,8 +11,6 @@
 ;       a[c + d] = 0;
 ; }
 
-; Make sure we do not crash when verifying scev.
-
 @a = external global [0 x i8]
 
 define void @foo()  {
@@ -19,7 +18,7 @@ entry:
   br label %outerL
 
 outerL:                              ; preds = %entry, %outerLatch
-  %e = phi i64 [ undef, %entry ], [ %lcssa, %outerLatch ]
+  %e = phi i64 [ poison, %entry ], [ %lcssa, %outerLatch ]
   %c = phi i64 [ 0, %entry ], [ %c.next, %outerLatch ]
   %e.cmp = icmp slt i64 %e, 6
   br i1 %e.cmp, label %innerL, label %outerLatch

>From f55bab0dfd3bd5cf69ab8d16fc027e21a1bf40c2 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Tue, 29 Jul 2025 03:44:17 -0700
Subject: [PATCH 3/3] address comments

---
 llvm/lib/Analysis/ScalarEvolution.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index bbfbf8c02b3ab..4dd52ac944bc2 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7422,6 +7422,9 @@ ScalarEvolution::getLoopProperties(const Loop *L) {
       if (auto *SI = dyn_cast<StoreInst>(I))
         return !SI->isSimple();
 
+      if (I->mayThrow())
+        return true;
+
       // Check if the function accesses inaccessible memory.
       if (auto *CI = dyn_cast<CallInst>(I)) {
         auto ME = CI->getMemoryEffects();
@@ -7429,7 +7432,7 @@ ScalarEvolution::getLoopProperties(const Loop *L) {
           return false;
       }
 
-      return I->mayThrow() || I->mayWriteToMemory();
+      return I->mayWriteToMemory();
     };
 
     LoopProperties LP = {/* HasNoAbnormalExits */ true,



More information about the llvm-commits mailing list