[llvm] 2d9c6e6 - [Thumb1] Use callee-saved register to adjust stack pointer
Keith Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 17 10:30:21 PDT 2023
Author: Keith Walker
Date: 2023-08-17T18:29:50+01:00
New Revision: 2d9c6e699a09d1363e435e6692508dd290984a00
URL: https://github.com/llvm/llvm-project/commit/2d9c6e699a09d1363e435e6692508dd290984a00
DIFF: https://github.com/llvm/llvm-project/commit/2d9c6e699a09d1363e435e6692508dd290984a00.diff
LOG: [Thumb1] Use callee-saved register to adjust stack pointer
When adjusting the Stack Pointer at the end of the function epilogue,
use a callee-saved register, rather than explicitly using R4 which may
not have been saved.
Differential Revision: https://reviews.llvm.org/D157500
Added:
Modified:
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
llvm/test/CodeGen/ARM/cmse.ll
llvm/test/CodeGen/ARM/thumb1-varalloc.ll
llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
llvm/test/CodeGen/Thumb/callee_save.ll
llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
llvm/test/CodeGen/Thumb/frame-chain.ll
llvm/test/CodeGen/Thumb/large-stack.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c2962c4857c3c0..401398bdd4b10d 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -538,18 +538,30 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
AFI->getDPRCalleeSavedAreaSize() +
ArgRegsSaveSize);
+ // We are likely to need a scratch register and we know all callee-save
+ // registers are free at this point in the epilogue, so pick one.
+ unsigned ScratchRegister = ARM::NoRegister;
+ bool HasFP = hasFP(MF);
+ for (auto &I : MFI.getCalleeSavedInfo()) {
+ Register Reg = I.getReg();
+ if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+ ScratchRegister = Reg;
+ break;
+ }
+ }
+
if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot, the target is ELF and the function has FP, or
// the target uses var sized objects.
if (NumBytes) {
- assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
+ assert(ScratchRegister != ARM::NoRegister &&
"No scratch register to restore SP from FP!");
- emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchRegister, FramePtr, -NumBytes,
TII, *RegInfo, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4)
+ .addReg(ScratchRegister)
.add(predOps(ARMCC::AL))
.setMIFlag(MachineInstr::FrameDestroy);
} else
@@ -558,18 +570,6 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
.add(predOps(ARMCC::AL))
.setMIFlag(MachineInstr::FrameDestroy);
} else {
- // For a large stack frame, we might need a scratch register to store
- // the size of the frame. We know all callee-save registers are free
- // at this point in the epilogue, so pick one.
- unsigned ScratchRegister = ARM::NoRegister;
- bool HasFP = hasFP(MF);
- for (auto &I : MFI.getCalleeSavedInfo()) {
- Register Reg = I.getReg();
- if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
- ScratchRegister = Reg;
- break;
- }
- }
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
diff --git a/llvm/test/CodeGen/ARM/cmse.ll b/llvm/test/CodeGen/ARM/cmse.ll
index 46e9c86b4a1fff..7ed6556ce1eb7e 100644
--- a/llvm/test/CodeGen/ARM/cmse.ll
+++ b/llvm/test/CodeGen/ARM/cmse.ll
@@ -290,9 +290,9 @@ define void @func5() #4 {
; CHECK-8B-NEXT: mov sp, r4
; CHECK-8B-NEXT: mov r0, sp
; CHECK-8B-NEXT: bl func51
-; CHECK-8B-NEXT: subs r4, r7, #7
-; CHECK-8B-NEXT: subs r4, #1
-; CHECK-8B-NEXT: mov sp, r4
+; CHECK-8B-NEXT: subs r6, r7, #7
+; CHECK-8B-NEXT: subs r6, #1
+; CHECK-8B-NEXT: mov sp, r6
; CHECK-8B-NEXT: pop {r4, r6, r7}
; CHECK-8B-NEXT: pop {r0}
; CHECK-8B-NEXT: mov lr, r0
diff --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 6c1b5c3614cccb..b7bf254b94d7d2 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -33,9 +33,9 @@ bb2:
bb3:
%.0 = phi ptr [ %0, %entry ], [ %5, %bb2 ], [ %2, %bb1 ]
-; CHECK: subs r4, r7, #7
-; CHECK-NEXT: subs r4, #1
-; CHECK-NEXT: mov sp, r4
+; CHECK: subs r6, r7, #7
+; CHECK-NEXT: subs r6, #1
+; CHECK-NEXT: mov sp, r6
; CHECK-NEXT: pop {r4, r6, r7, pc}
ret ptr %.0
}
diff --git a/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll b/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
index e659ee984ef4a7..5af4cbe23e0064 100644
--- a/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
+++ b/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -5,13 +5,16 @@
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, ptr }
define void @t1(ptr %v) {
-; CHECK: push {r4
%tmp6 = load i32, ptr null
%tmp8 = alloca float, i32 %tmp6
store i32 1, ptr null
br label %return
return: ; preds = %0
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7, #7
+; CHECK: subs [[SCRATCH]], #1
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK-NEXT: pop
+; CHECK-SAME: [[SCRATCH]]
ret void
}
diff --git a/llvm/test/CodeGen/Thumb/callee_save.ll b/llvm/test/CodeGen/Thumb/callee_save.ll
index b8a5af71e728d6..550542cc2330cb 100644
--- a/llvm/test/CodeGen/Thumb/callee_save.ll
+++ b/llvm/test/CodeGen/Thumb/callee_save.ll
@@ -304,9 +304,9 @@ define <4 x i32> @base_pointer(i32 %a) {
; CHECK-NEXT: movs r1, #2
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: movs r3, #4
-; CHECK-NEXT: subs r4, r7, #7
-; CHECK-NEXT: subs r4, #9
-; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: subs r6, r7, #7
+; CHECK-NEXT: subs r6, #9
+; CHECK-NEXT: mov sp, r6
; CHECK-NEXT: pop {r4, r6}
; CHECK-NEXT: mov r8, r4
; CHECK-NEXT: mov r9, r6
diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
index b20e80b8104548..4bbfd916ff4cbc 100644
--- a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
+++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
@@ -29,9 +29,9 @@ define void @vla_emergency_spill(i32 %n) {
; CHECK-NEXT: ldr r0, [r6]
; CHECK-NEXT: @APP
; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: subs r4, r7, #7
-; CHECK-NEXT: subs r4, #5
-; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: subs r6, r7, #7
+; CHECK-NEXT: subs r6, #5
+; CHECK-NEXT: mov sp, r6
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
@@ -253,9 +253,9 @@ define void @aligned_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, ptr byva
; CHECK-NEXT: ldr r0, [sp]
; CHECK-NEXT: @APP
; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: subs r4, r7, #7
-; CHECK-NEXT: subs r4, #5
-; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: subs r6, r7, #7
+; CHECK-NEXT: subs r6, #5
+; CHECK-NEXT: mov sp, r6
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
@@ -300,9 +300,9 @@ define void @aligned_no_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, ptr b
; CHECK-NEXT: str r5, [r7, #124]
; CHECK-NEXT: @APP
; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: subs r4, r7, #7
-; CHECK-NEXT: subs r4, #5
-; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: subs r6, r7, #7
+; CHECK-NEXT: subs r6, #5
+; CHECK-NEXT: mov sp, r6
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%y = alloca [4 x i32], align 16
@@ -350,9 +350,9 @@ define void @aligned_out_of_range_access(i32 %n, i32 %n2, i32 %n3, i32 %n4, ptr
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: @APP
; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: subs r4, r7, #7
-; CHECK-NEXT: subs r4, #5
-; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: subs r6, r7, #7
+; CHECK-NEXT: subs r6, #5
+; CHECK-NEXT: mov sp, r6
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%y = alloca [4 x i32], align 16
diff --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll
index 6c14e103fbf452..c92235e0f8279e 100644
--- a/llvm/test/CodeGen/Thumb/frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb/frame-chain.ll
@@ -151,9 +151,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
; FP-NEXT: movs r1, #0
; FP-NEXT: str r1, [r6, #4]
; FP-NEXT: str r0, [r2]
-; FP-NEXT: subs r4, r7, #7
-; FP-NEXT: subs r4, #1
-; FP-NEXT: mov sp, r4
+; FP-NEXT: subs r6, r7, #7
+; FP-NEXT: subs r6, #1
+; FP-NEXT: mov sp, r6
; FP-NEXT: pop {r4, r6, r7, pc}
;
; FP-AAPCS-LABEL: required_fp:
@@ -185,9 +185,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
; FP-AAPCS-NEXT: movs r1, #0
; FP-AAPCS-NEXT: str r1, [r6, #4]
; FP-AAPCS-NEXT: str r0, [r2]
-; FP-AAPCS-NEXT: mov r4, r11
-; FP-AAPCS-NEXT: subs r4, #8
-; FP-AAPCS-NEXT: mov sp, r4
+; FP-AAPCS-NEXT: mov r6, r11
+; FP-AAPCS-NEXT: subs r6, #8
+; FP-AAPCS-NEXT: mov sp, r6
; FP-AAPCS-NEXT: pop {r4, r6}
; FP-AAPCS-NEXT: pop {r0}
; FP-AAPCS-NEXT: mov r11, r0
@@ -217,9 +217,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
; NOFP-NEXT: movs r1, #0
; NOFP-NEXT: str r1, [r6, #4]
; NOFP-NEXT: str r0, [r2]
-; NOFP-NEXT: subs r4, r7, #7
-; NOFP-NEXT: subs r4, #1
-; NOFP-NEXT: mov sp, r4
+; NOFP-NEXT: subs r6, r7, #7
+; NOFP-NEXT: subs r6, #1
+; NOFP-NEXT: mov sp, r6
; NOFP-NEXT: pop {r4, r6, r7, pc}
;
; NOFP-AAPCS-LABEL: required_fp:
@@ -251,9 +251,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
; NOFP-AAPCS-NEXT: movs r1, #0
; NOFP-AAPCS-NEXT: str r1, [r6, #4]
; NOFP-AAPCS-NEXT: str r0, [r2]
-; NOFP-AAPCS-NEXT: mov r4, r11
-; NOFP-AAPCS-NEXT: subs r4, #8
-; NOFP-AAPCS-NEXT: mov sp, r4
+; NOFP-AAPCS-NEXT: mov r6, r11
+; NOFP-AAPCS-NEXT: subs r6, #8
+; NOFP-AAPCS-NEXT: mov sp, r6
; NOFP-AAPCS-NEXT: pop {r4, r6}
; NOFP-AAPCS-NEXT: pop {r0}
; NOFP-AAPCS-NEXT: mov r11, r0
diff --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll
index 1c0b8619bb2fe8..4529204883ab33 100644
--- a/llvm/test/CodeGen/Thumb/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb/large-stack.ll
@@ -33,9 +33,11 @@ define void @test100_nofpelim() "frame-pointer"="all" {
; CHECK: sub sp, #508
; CHECK: sub sp, #508
; CHECK: sub sp, #508
-; CHECK: subs r4, r7, #7
-; CHECK: subs r4, #1
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7, #7
+; CHECK: subs [[SCRATCH]], #1
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK: pop
+; CHECK-SAME: [[SCRATCH]]
%tmp = alloca [ 1524 x i8 ] , align 4
ret void
}
@@ -56,9 +58,11 @@ define void @test2_nofpelim() "frame-pointer"="all" {
; CHECK-LABEL: test2_nofpelim{{>?}}:
; CHECK: ldr [[TEMP:r[0-7]]],
; CHECK: add sp, [[TEMP]]
-; CHECK: subs r4, r7, #7
-; CHECK: subs r4, #1
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7, #7
+; CHECK: subs [[SCRATCH]], #1
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK: pop
+; CHECK-SAME: [[SCRATCH]]
%tmp = alloca [ 1528 x i8 ] , align 4
ret void
}
@@ -85,8 +89,10 @@ define i32 @test3_nofpelim() "frame-pointer"="all" {
; CHECK: add sp, [[TEMP]]
; CHECK: ldr [[TEMP2:r[0-7]]],
; CHECK: add [[TEMP2]], sp
-; CHECK: subs r4, r7,
-; CHECK: mov sp, r4
+; CHECK: subs [[SCRATCH:r[0-7]]], r7,
+; CHECK: mov sp, [[SCRATCH]]
+; CHECK: pop
+; CHECK-SAME: [[SCRATCH]]
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 8
More information about the llvm-commits
mailing list