[llvm] 2d9c6e6 - [Thumb1] Use callee-saved register to adjust stack pointer

Keith Walker via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 17 10:30:21 PDT 2023


Author: Keith Walker
Date: 2023-08-17T18:29:50+01:00
New Revision: 2d9c6e699a09d1363e435e6692508dd290984a00

URL: https://github.com/llvm/llvm-project/commit/2d9c6e699a09d1363e435e6692508dd290984a00
DIFF: https://github.com/llvm/llvm-project/commit/2d9c6e699a09d1363e435e6692508dd290984a00.diff

LOG: [Thumb1] Use callee-saved register to adjust stack pointer

When adjusting the Stack Pointer at the end of the function epilogue,
use a callee-saved register, rather than explicitly using R4 which may
not have been saved.

Differential Revision: https://reviews.llvm.org/D157500

Added: 
    

Modified: 
    llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
    llvm/test/CodeGen/ARM/cmse.ll
    llvm/test/CodeGen/ARM/thumb1-varalloc.ll
    llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
    llvm/test/CodeGen/Thumb/callee_save.ll
    llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
    llvm/test/CodeGen/Thumb/frame-chain.ll
    llvm/test/CodeGen/Thumb/large-stack.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c2962c4857c3c0..401398bdd4b10d 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -538,18 +538,30 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                  AFI->getDPRCalleeSavedAreaSize() +
                  ArgRegsSaveSize);
 
+    // We are likely to need a scratch register and we know all callee-save
+    // registers are free at this point in the epilogue, so pick one.
+    unsigned ScratchRegister = ARM::NoRegister;
+    bool HasFP = hasFP(MF);
+    for (auto &I : MFI.getCalleeSavedInfo()) {
+      Register Reg = I.getReg();
+      if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+        ScratchRegister = Reg;
+        break;
+      }
+    }
+
     if (AFI->shouldRestoreSPFromFP()) {
       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
       // Reset SP based on frame pointer only if the stack frame extends beyond
       // frame pointer stack slot, the target is ELF and the function has FP, or
       // the target uses var sized objects.
       if (NumBytes) {
-        assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
+        assert(ScratchRegister != ARM::NoRegister &&
                "No scratch register to restore SP from FP!");
-        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+        emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchRegister, FramePtr, -NumBytes,
                                   TII, *RegInfo, MachineInstr::FrameDestroy);
         BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
-            .addReg(ARM::R4)
+            .addReg(ScratchRegister)
             .add(predOps(ARMCC::AL))
             .setMIFlag(MachineInstr::FrameDestroy);
       } else
@@ -558,18 +570,6 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
             .add(predOps(ARMCC::AL))
             .setMIFlag(MachineInstr::FrameDestroy);
     } else {
-      // For a large stack frame, we might need a scratch register to store
-      // the size of the frame.  We know all callee-save registers are free
-      // at this point in the epilogue, so pick one.
-      unsigned ScratchRegister = ARM::NoRegister;
-      bool HasFP = hasFP(MF);
-      for (auto &I : MFI.getCalleeSavedInfo()) {
-        Register Reg = I.getReg();
-        if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
-          ScratchRegister = Reg;
-          break;
-        }
-      }
       if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
           &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
         MachineBasicBlock::iterator PMBBI = std::prev(MBBI);

diff  --git a/llvm/test/CodeGen/ARM/cmse.ll b/llvm/test/CodeGen/ARM/cmse.ll
index 46e9c86b4a1fff..7ed6556ce1eb7e 100644
--- a/llvm/test/CodeGen/ARM/cmse.ll
+++ b/llvm/test/CodeGen/ARM/cmse.ll
@@ -290,9 +290,9 @@ define void @func5() #4 {
 ; CHECK-8B-NEXT:    mov sp, r4
 ; CHECK-8B-NEXT:    mov r0, sp
 ; CHECK-8B-NEXT:    bl func51
-; CHECK-8B-NEXT:    subs r4, r7, #7
-; CHECK-8B-NEXT:    subs r4, #1
-; CHECK-8B-NEXT:    mov sp, r4
+; CHECK-8B-NEXT:    subs r6, r7, #7
+; CHECK-8B-NEXT:    subs r6, #1
+; CHECK-8B-NEXT:    mov sp, r6
 ; CHECK-8B-NEXT:    pop {r4, r6, r7}
 ; CHECK-8B-NEXT:    pop {r0}
 ; CHECK-8B-NEXT:    mov lr, r0

diff  --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 6c1b5c3614cccb..b7bf254b94d7d2 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -33,9 +33,9 @@ bb2:
 	
 bb3:
 	%.0 = phi ptr [ %0, %entry ], [ %5, %bb2 ], [ %2, %bb1 ]
-; CHECK:      subs    r4, r7, #7
-; CHECK-NEXT: subs    r4, #1
-; CHECK-NEXT: mov     sp, r4
+; CHECK:      subs    r6, r7, #7
+; CHECK-NEXT: subs    r6, #1
+; CHECK-NEXT: mov     sp, r6
 ; CHECK-NEXT: pop     {r4, r6, r7, pc}
 	ret ptr %.0
 }

diff  --git a/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll b/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
index e659ee984ef4a7..5af4cbe23e0064 100644
--- a/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
+++ b/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -5,13 +5,16 @@
 %struct.info = type { i32, i32, i32, i32, i32, i32, i32, ptr }
 
 define void @t1(ptr %v) {
-; CHECK: push {r4
   %tmp6 = load i32, ptr null
   %tmp8 = alloca float, i32 %tmp6
   store i32 1, ptr null
   br label %return
 
 return:                                           ; preds = %0
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7, #7
+; CHECK: subs [[SCRATCH]], #1
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK-NEXT: pop
+; CHECK-SAME: [[SCRATCH]]
   ret void
 }

diff  --git a/llvm/test/CodeGen/Thumb/callee_save.ll b/llvm/test/CodeGen/Thumb/callee_save.ll
index b8a5af71e728d6..550542cc2330cb 100644
--- a/llvm/test/CodeGen/Thumb/callee_save.ll
+++ b/llvm/test/CodeGen/Thumb/callee_save.ll
@@ -304,9 +304,9 @@ define <4 x i32> @base_pointer(i32 %a) {
 ; CHECK-NEXT:    movs r1, #2
 ; CHECK-NEXT:    movs r2, #3
 ; CHECK-NEXT:    movs r3, #4
-; CHECK-NEXT:    subs r4, r7, #7
-; CHECK-NEXT:    subs r4, #9
-; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    subs r6, r7, #7
+; CHECK-NEXT:    subs r6, #9
+; CHECK-NEXT:    mov sp, r6
 ; CHECK-NEXT:    pop {r4, r6}
 ; CHECK-NEXT:    mov r8, r4
 ; CHECK-NEXT:    mov r9, r6

diff  --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
index b20e80b8104548..4bbfd916ff4cbc 100644
--- a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
+++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
@@ -29,9 +29,9 @@ define void @vla_emergency_spill(i32 %n) {
 ; CHECK-NEXT:    ldr r0, [r6]
 ; CHECK-NEXT:    @APP
 ; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    subs r4, r7, #7
-; CHECK-NEXT:    subs r4, #5
-; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    subs r6, r7, #7
+; CHECK-NEXT:    subs r6, #5
+; CHECK-NEXT:    mov sp, r6
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
@@ -253,9 +253,9 @@ define void @aligned_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, ptr byva
 ; CHECK-NEXT:    ldr r0, [sp]
 ; CHECK-NEXT:    @APP
 ; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    subs r4, r7, #7
-; CHECK-NEXT:    subs r4, #5
-; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    subs r6, r7, #7
+; CHECK-NEXT:    subs r6, #5
+; CHECK-NEXT:    mov sp, r6
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
@@ -300,9 +300,9 @@ define void @aligned_no_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, ptr b
 ; CHECK-NEXT:    str r5, [r7, #124]
 ; CHECK-NEXT:    @APP
 ; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    subs r4, r7, #7
-; CHECK-NEXT:    subs r4, #5
-; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    subs r6, r7, #7
+; CHECK-NEXT:    subs r6, #5
+; CHECK-NEXT:    mov sp, r6
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %y = alloca [4 x i32], align 16
@@ -350,9 +350,9 @@ define void @aligned_out_of_range_access(i32 %n, i32 %n2, i32 %n3, i32 %n4, ptr
 ; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    @APP
 ; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    subs r4, r7, #7
-; CHECK-NEXT:    subs r4, #5
-; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    subs r6, r7, #7
+; CHECK-NEXT:    subs r6, #5
+; CHECK-NEXT:    mov sp, r6
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %y = alloca [4 x i32], align 16

diff  --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll
index 6c14e103fbf452..c92235e0f8279e 100644
--- a/llvm/test/CodeGen/Thumb/frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb/frame-chain.ll
@@ -151,9 +151,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; FP-NEXT:    movs r1, #0
 ; FP-NEXT:    str r1, [r6, #4]
 ; FP-NEXT:    str r0, [r2]
-; FP-NEXT:    subs r4, r7, #7
-; FP-NEXT:    subs r4, #1
-; FP-NEXT:    mov sp, r4
+; FP-NEXT:    subs r6, r7, #7
+; FP-NEXT:    subs r6, #1
+; FP-NEXT:    mov sp, r6
 ; FP-NEXT:    pop {r4, r6, r7, pc}
 ;
 ; FP-AAPCS-LABEL: required_fp:
@@ -185,9 +185,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; FP-AAPCS-NEXT:    movs r1, #0
 ; FP-AAPCS-NEXT:    str r1, [r6, #4]
 ; FP-AAPCS-NEXT:    str r0, [r2]
-; FP-AAPCS-NEXT:    mov r4, r11
-; FP-AAPCS-NEXT:    subs r4, #8
-; FP-AAPCS-NEXT:    mov sp, r4
+; FP-AAPCS-NEXT:    mov r6, r11
+; FP-AAPCS-NEXT:    subs r6, #8
+; FP-AAPCS-NEXT:    mov sp, r6
 ; FP-AAPCS-NEXT:    pop {r4, r6}
 ; FP-AAPCS-NEXT:    pop {r0}
 ; FP-AAPCS-NEXT:    mov r11, r0
@@ -217,9 +217,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; NOFP-NEXT:    movs r1, #0
 ; NOFP-NEXT:    str r1, [r6, #4]
 ; NOFP-NEXT:    str r0, [r2]
-; NOFP-NEXT:    subs r4, r7, #7
-; NOFP-NEXT:    subs r4, #1
-; NOFP-NEXT:    mov sp, r4
+; NOFP-NEXT:    subs r6, r7, #7
+; NOFP-NEXT:    subs r6, #1
+; NOFP-NEXT:    mov sp, r6
 ; NOFP-NEXT:    pop {r4, r6, r7, pc}
 ;
 ; NOFP-AAPCS-LABEL: required_fp:
@@ -251,9 +251,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; NOFP-AAPCS-NEXT:    movs r1, #0
 ; NOFP-AAPCS-NEXT:    str r1, [r6, #4]
 ; NOFP-AAPCS-NEXT:    str r0, [r2]
-; NOFP-AAPCS-NEXT:    mov r4, r11
-; NOFP-AAPCS-NEXT:    subs r4, #8
-; NOFP-AAPCS-NEXT:    mov sp, r4
+; NOFP-AAPCS-NEXT:    mov r6, r11
+; NOFP-AAPCS-NEXT:    subs r6, #8
+; NOFP-AAPCS-NEXT:    mov sp, r6
 ; NOFP-AAPCS-NEXT:    pop {r4, r6}
 ; NOFP-AAPCS-NEXT:    pop {r0}
 ; NOFP-AAPCS-NEXT:    mov r11, r0

diff  --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll
index 1c0b8619bb2fe8..4529204883ab33 100644
--- a/llvm/test/CodeGen/Thumb/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb/large-stack.ll
@@ -33,9 +33,11 @@ define void @test100_nofpelim() "frame-pointer"="all" {
 ; CHECK: sub sp, #508
 ; CHECK: sub sp, #508
 ; CHECK: sub sp, #508
-; CHECK: subs r4, r7, #7
-; CHECK: subs r4, #1
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7, #7
+; CHECK: subs [[SCRATCH]], #1
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK: pop
+; CHECK-SAME: [[SCRATCH]]
     %tmp = alloca [ 1524 x i8 ] , align 4
     ret void
 }
@@ -56,9 +58,11 @@ define void @test2_nofpelim() "frame-pointer"="all" {
 ; CHECK-LABEL: test2_nofpelim{{>?}}:
 ; CHECK: ldr [[TEMP:r[0-7]]],
 ; CHECK: add sp, [[TEMP]]
-; CHECK: subs r4, r7, #7
-; CHECK: subs r4, #1
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7, #7
+; CHECK: subs [[SCRATCH]], #1
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK: pop
+; CHECK-SAME: [[SCRATCH]]
     %tmp = alloca [ 1528 x i8 ] , align 4
     ret void
 }
@@ -85,8 +89,10 @@ define i32 @test3_nofpelim() "frame-pointer"="all" {
 ; CHECK: add sp, [[TEMP]]
 ; CHECK: ldr [[TEMP2:r[0-7]]],
 ; CHECK: add [[TEMP2]], sp
-; CHECK: subs r4, r7,
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7,
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK: pop
+; CHECK-SAME: [[SCRATCH]]
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 8


        


More information about the llvm-commits mailing list