[llvm] 7af7064 - Revert "[AArch64] Remove unused ReverseCSRRestoreSeq option. (#82326)"

Caroline Concatto via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 20 10:14:13 PST 2024


Author: Caroline Concatto
Date: 2024-02-20T18:13:33Z
New Revision: 7af70643ca4220c254bdb1e9ea51762228642a10

URL: https://github.com/llvm/llvm-project/commit/7af70643ca4220c254bdb1e9ea51762228642a10
DIFF: https://github.com/llvm/llvm-project/commit/7af70643ca4220c254bdb1e9ea51762228642a10.diff

LOG: Revert "[AArch64] Remove unused ReverseCSRRestoreSeq option. (#82326)"

Patch  3f0404aae7ed2 is breaking some debugs build so we cannot use the reverse here.

This reverts commit 493f10106f7f1799eb67be95058b251e6a3bf0af.

Added: 
    llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c013bbe9926fef..0e9adde564b3e5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -239,6 +239,11 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
                                    cl::desc("enable use of redzone on AArch64"),
                                    cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+    ReverseCSRRestoreSeq("reverse-csr-restore-seq",
+                         cl::desc("reverse the CSR restore sequence"),
+                         cl::init(false), cl::Hidden);
+
 static cl::opt<bool> StackTaggingMergeSetTag(
     "stack-tagging-merge-settag",
     cl::desc("merge settag instruction in function epilog"), cl::init(true),
@@ -302,6 +307,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
     return false;
   if (!EnableHomogeneousPrologEpilog)
     return false;
+  if (ReverseCSRRestoreSeq)
+    return false;
   if (EnableRedZone)
     return false;
 
@@ -3104,27 +3111,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
 
   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
 
-  if (homogeneousPrologEpilog(MF, &MBB)) {
-    auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
-                   .setMIFlag(MachineInstr::FrameDestroy);
-    for (auto &RPI : RegPairs) {
-      MIB.addReg(RPI.Reg1, RegState::Define);
-      MIB.addReg(RPI.Reg2, RegState::Define);
-    }
-    return true;
-  }
-
-  // For performance reasons restore SVE register in increasing order
-  auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
-  auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
-  auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
-  std::reverse(PPRBegin, PPREnd.base());
-  auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
-  auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
-  auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
-  std::reverse(ZPRBegin, ZPREnd.base());
-
-  for (const RegPairInfo &RPI : RegPairs) {
+  auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator {
     unsigned Reg1 = RPI.Reg1;
     unsigned Reg2 = RPI.Reg2;
 
@@ -3198,6 +3185,43 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
         MachineMemOperand::MOLoad, Size, Alignment));
     if (NeedsWinCFI)
       InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+
+    return MIB->getIterator();
+  };
+
+  if (homogeneousPrologEpilog(MF, &MBB)) {
+    auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
+                   .setMIFlag(MachineInstr::FrameDestroy);
+    for (auto &RPI : RegPairs) {
+      MIB.addReg(RPI.Reg1, RegState::Define);
+      MIB.addReg(RPI.Reg2, RegState::Define);
+    }
+    return true;
+  }
+
+  // For performance reasons restore SVE register in increasing order
+  auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
+  auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
+  auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
+  std::reverse(PPRBegin, PPREnd.base());
+  auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
+  auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
+  auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
+  std::reverse(ZPRBegin, ZPREnd.base());
+
+  if (ReverseCSRRestoreSeq) {
+    MachineBasicBlock::iterator First = MBB.end();
+    for (const RegPairInfo &RPI : reverse(RegPairs)) {
+      MachineBasicBlock::iterator It = EmitMI(RPI);
+      if (First == MBB.end())
+        First = It;
+    }
+    if (First != MBB.end())
+      MBB.splice(MBBI, &MBB, First);
+  } else {
+    for (const RegPairInfo &RPI : RegPairs) {
+      (void)EmitMI(RPI);
+    }
   }
 
   return true;

diff  --git a/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
new file mode 100644
index 00000000000000..de4baec50e0c66
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
@@ -0,0 +1,101 @@
+# RUN: llc -run-pass=prologepilog -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK
+# RUN: llc -start-before=prologepilog -stop-after=aarch64-ldst-opt -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK
+#
+--- |
+
+  define void @foo() nounwind { entry: unreachable }
+
+  define void @bar() nounwind { entry: unreachable }
+
+  define void @baz() nounwind { entry: unreachable }
+
+...
+---
+name:            foo
+# CHECK-LABEL: name: foo
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $x19 = IMPLICIT_DEF
+    $x20 = IMPLICIT_DEF
+    $x21 = IMPLICIT_DEF
+    $x22 = IMPLICIT_DEF
+    $x23 = IMPLICIT_DEF
+    $x24 = IMPLICIT_DEF
+    $x25 = IMPLICIT_DEF
+    $x26 = IMPLICIT_DEF
+
+  ; The local stack size is 0, so the last ldp in the sequence will also
+  ; restore the stack.
+  ; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 2
+  ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 4
+  ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 6
+
+  ; The ldp and the stack increment get merged even before
+  ; the load-store optimizer.
+  ; CHECK-NEXT: early-clobber $sp, $x26, $x25 = frame-destroy LDPXpost $sp, 8
+
+    RET_ReallyLR
+...
+---
+name:            bar
+# CHECK-LABEL: name: bar
+tracksRegLiveness: true
+stack:
+  - { id : 0, size: 8, alignment: 4,
+  stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+  local-offset: -4, debug-info-variable: '', debug-info-expression: '',
+  debug-info-location: '' }
+
+body:             |
+  bb.0:
+    $x19 = IMPLICIT_DEF
+    $x20 = IMPLICIT_DEF
+    $x21 = IMPLICIT_DEF
+    $x22 = IMPLICIT_DEF
+    $x23 = IMPLICIT_DEF
+    $x24 = IMPLICIT_DEF
+    $x25 = IMPLICIT_DEF
+    $x26 = IMPLICIT_DEF
+
+  ; The local stack size is not 0, and we can combine the CSR stack size with
+  ; the local stack size. This results in rewriting the offsets for all the
+  ; save/restores and forbids us to merge the stack adjustment and the last pop.
+  ; In this case, there is no point of moving the first CSR pair at the end.
+  ; We do it anyway, as it's a small price to pay for the resulting
+  ; simplification in the epilogue emission code.
+  ; CHECK:      $x24, $x23 = frame-destroy LDPXi $sp, 4
+  ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 6
+  ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 8
+  ; CHECK-NEXT: $x26, $x25 = frame-destroy LDPXi $sp, 2
+  ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0
+    RET_ReallyLR
+...
+---
+# Check that the load from the offset 0 is moved at the end even when hasFP is
+# false.
+name:            baz
+# CHECK-LABEL: name: baz
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+  hasCalls:        true
+body:             |
+  bb.0:
+    successors: %bb.1
+
+    $x0 = IMPLICIT_DEF
+    $x20 = IMPLICIT_DEF
+    $x21 = IMPLICIT_DEF
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    B %bb.1
+
+  bb.1:
+   ; CHECK: $x21, $x20 = frame-destroy LDPXi $sp, 2
+   ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 32
+    RET_ReallyLR
+...


        


More information about the llvm-commits mailing list