[llvm] [AArch64LoadStoreOpt] BaseReg update is searched also in CF successor (PR #145583)

Sergey Shcherbinin via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 1 09:45:50 PDT 2025


https://github.com/SergeyShch01 updated https://github.com/llvm/llvm-project/pull/145583

>From ecdf7ed0aea67097c4587c791ea65f548e317140 Mon Sep 17 00:00:00 2001
From: Sergey Shcherbinin <sscherbinin at nvidia.com>
Date: Tue, 24 Jun 2025 23:13:12 +0400
Subject: [PATCH] [AArch64LoadStoreOpt] Look for reg update instruction (to
 merge w/ mem instruction into pre/post-increment form) not only inside a
 single MBB but also along a CF path going downward w/o side enters such that
 BaseReg is alive along it but not at its exits. Regression test is updated
 accordingly.

---
 .../AArch64/AArch64LoadStoreOptimizer.cpp     | 79 +++++++++++++------
 .../LoopStrengthReduce/AArch64/pr53625.ll     |  3 +-
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dde4e7ab0e890..8c44698c088da 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -2529,30 +2529,63 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
     return E;
   }
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit;
-       MBBI = next_nodbg(MBBI, E)) {
-    MachineInstr &MI = *MBBI;
-
-    // Don't count transient instructions towards the search limit since there
-    // may be different numbers of them if e.g. debug information is present.
-    if (!MI.isTransient())
-      ++Count;
-
-    // If we found a match, return it.
-    if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
-      return MBBI;
-
-    // Update the status of what the instruction clobbered and used.
-    LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+  MachineBasicBlock *CurMBB = I->getParent();
+  // choice of next block to visit is liveins-based
+  bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
+
+  while (true) {
+    MachineBasicBlock::iterator CurEnd = CurMBB->end();
+
+    for (unsigned Count = 0; MBBI != CurEnd && Count < Limit;
+         MBBI = next_nodbg(MBBI, CurEnd)) {
+      MachineInstr &MI = *MBBI;
+
+      // Don't count transient instructions towards the search limit since there
+      // may be different numbers of them if e.g. debug information is present.
+      if (!MI.isTransient())
+        ++Count;
+
+      // If we found a match, return it.
+      if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
+        return MBBI;
+
+      // Update the status of what the instruction clobbered and used.
+      LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+                                        TRI);
+
+      // Otherwise, if the base register is used or modified, we have no match,
+      // so return early. If we are optimizing SP, do not allow instructions
+      // that may load or store in between the load and the optimized value
+      // update.
+      if (!ModifiedRegUnits.available(BaseReg) ||
+          !UsedRegUnits.available(BaseReg) ||
+          (BaseRegSP && MBBI->mayLoadOrStore()))
+        return E;
+    }
 
-    // Otherwise, if the base register is used or modified, we have no match, so
-    // return early.
-    // If we are optimizing SP, do not allow instructions that may load or store
-    // in between the load and the optimized value update.
-    if (!ModifiedRegUnits.available(BaseReg) ||
-        !UsedRegUnits.available(BaseReg) ||
-        (BaseRegSP && MBBI->mayLoadOrStore()))
-      return E;
+    if (VisitSucc) {
+      // Try to go downward to successors along a CF path w/o side enters
+      // such that BaseReg is alive along it but not at its exits
+      MachineBasicBlock *SuccToVisit = nullptr;
+      unsigned LiveSuccCount = 0;
+      for (MachineBasicBlock *Succ : CurMBB->successors()) {
+        if (Succ->isLiveIn(BaseReg)) {
+          if (LiveSuccCount++) {
+            return E;
+          }
+          if (Succ->pred_size() == 1) {
+            SuccToVisit = Succ;
+          }
+        }
+      }
+      if (!SuccToVisit) {
+        break;
+      }
+      CurMBB = SuccToVisit;
+      MBBI = CurMBB->begin();
+    } else {
+      break;
+    }
   }
   return E;
 }
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
index 3c1094f2ee31d..ff2527d5bb6ad 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
@@ -131,12 +131,11 @@ define i32 @negative_test_type_is_struct(i32 %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:  .LBB2_2: // %for.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr w9, [x1]
+; CHECK-NEXT:    ldr w9, [x1], #4
 ; CHECK-NEXT:    cbnz w9, .LBB2_5
 ; CHECK-NEXT:  // %bb.3: // %for.cond
 ; CHECK-NEXT:    // in Loop: Header=BB2_2 Depth=1
 ; CHECK-NEXT:    subs x8, x8, #1
-; CHECK-NEXT:    add x1, x1, #4
 ; CHECK-NEXT:    b.ne .LBB2_2
 ; CHECK-NEXT:  .LBB2_4:
 ; CHECK-NEXT:    mov w0, wzr



More information about the llvm-commits mailing list