[llvm] [ARM] Optimise non-ABI frame pointers (PR #110286)

Oliver Stannard via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 17 04:54:46 PDT 2024


https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/110286

>From a75ebc81bea16a7cd0da2bf4ffed4943e7a68a6a Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 13:31:38 +0100
Subject: [PATCH 1/2] [ARM] Re-generate PACBTI tests using update_llc_checks.py

---
 llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll |  45 +++--
 llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll    | 148 +++++++++------
 .../Thumb2/pacbti-m-indirect-tail-call.ll     |  39 ++--
 .../CodeGen/Thumb2/pacbti-m-outliner-3.ll     | 124 +++++++------
 .../CodeGen/Thumb2/pacbti-m-outliner-4.ll     | 172 +++++++++++-------
 .../test/CodeGen/Thumb2/pacbti-m-overalign.ll |  60 +++---
 .../test/CodeGen/Thumb2/pacbti-m-varargs-1.ll |  77 +++++---
 .../test/CodeGen/Thumb2/pacbti-m-varargs-2.ll |  79 +++++---
 llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll      | 113 +++++++++---
 9 files changed, 546 insertions(+), 311 deletions(-)

diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
index c309d992b95a5e..c2a2ed2d0c8e8a 100644
--- a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
@@ -1,9 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabi"
 
+; Check the function starts with `pacbti` and correct unwind info is emitted
 define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "branch-target-enforcement" {
+; CHECK-LABEL: _Z1fi:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pacbti r12, lr, sp
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r7, -8
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    bl _Z1gi
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %add = add nsw i32 %x, 1
   %call = tail call i32 @_Z1gi(i32 %add)
@@ -13,24 +40,6 @@ entry:
 
 declare dso_local i32 @_Z1gi(i32)
 
-; Check the function starts with `pacbti` and correct unwind info is emitted
-; CHECK-LABEL: _Z1fi:
-; ...
-; CHECK:       pacbti   r12, lr, sp
-; CHECK-NEXT:  .save    {r7, lr}
-; CHECK-NEXT:  push     {r7, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 8
-; CHECK-NEXT:  .cfi_offset lr, -4
-; CHECK-NEXT:  .cfi_offset r7, -8
-; CHECK-NEXT:  .save   {ra_auth_code}
-; CHECK-NEXT:  str     r12, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .cfi_offset ra_auth_code, -12
-; CHECK-NEXT:  .pad    #4
-; CHECK-NEXT:  sub     sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; ...
-
 ; UNWIND-LABEL: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
index 0ae46cb8879ee0..64c5a6c7030145 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -16,6 +17,31 @@ target triple = "thumbv8m.main-none-none-eabi"
 ; }
 
 define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf" {
+; CHECK-LABEL: f0:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r7, -8
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %sub = add nsw i32 %x, -1
   %call = tail call i32 @g(i32 %sub)
@@ -23,27 +49,32 @@ entry:
   ret i32 %add
 }
 
-; CHECK-LABEL: f0:
-; CHECK:       pac     r12, lr, sp
-; CHECK-NEXT:  .save   {r7, lr}
-; CHECK-NEXT:  push    {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save   {ra_auth_code}
-; CHECK-NEXT:  str     r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT: .pad    #4
-; CHECK-NEXT:  sub     sp, #4
-; ...
-; CHECK:       add     sp, #4
-; CHECK-NEXT:  ldr     r12, [sp], #4
-; CHECK-NEXT:  pop.w   {r7, lr}
-; CHECK-NEXT:  aut     r12, lr, sp
-; CHECK-NEXT:  bx      lr
-
 define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
+; CHECK-LABEL: f1:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    vstr fpcxtns, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset lr, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -16
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-NEXT:    vldr fpcxtns, [sp], #4
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    clrm {r1, r2, r3, r12, apsr}
+; CHECK-NEXT:    bxns lr
 entry:
   %sub = add nsw i32 %x, -1
   %call = tail call i32 @g(i32 %sub)
@@ -51,44 +82,59 @@ entry:
   ret i32 %add
 }
 
-; CHECK-LABEL: f1:
-; CHECK:       pac     r12, lr, sp
-; CHECK-NEXT:  vstr    fpcxtns, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 4
-; CHECK-NEXT:  .save    {r7, lr}
-; CHECK-NEXT:  push    {r7, lr}
-; CHECK:       vldr    fpcxtns, [sp], #4
-; CHECK:       aut     r12, lr, sp
-
 define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
+; CHECK-LABEL: f2:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r7, -8
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    mrs r12, control
+; CHECK-NEXT:    tst.w r12, #8
+; CHECK-NEXT:    beq .LBB2_2
+; CHECK-NEXT:  @ %bb.1: @ %entry
+; CHECK-NEXT:    vmrs r12, fpscr
+; CHECK-NEXT:    vmov d0, lr, lr
+; CHECK-NEXT:    vmov d1, lr, lr
+; CHECK-NEXT:    vmov d2, lr, lr
+; CHECK-NEXT:    vmov d3, lr, lr
+; CHECK-NEXT:    vmov d4, lr, lr
+; CHECK-NEXT:    vmov d5, lr, lr
+; CHECK-NEXT:    vmov d6, lr, lr
+; CHECK-NEXT:    vmov d7, lr, lr
+; CHECK-NEXT:    bic r12, r12, #159
+; CHECK-NEXT:    bic r12, r12, #4026531840
+; CHECK-NEXT:    vmsr fpscr, r12
+; CHECK-NEXT:  .LBB2_2: @ %entry
+; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    mov r2, lr
+; CHECK-NEXT:    mov r3, lr
+; CHECK-NEXT:    mov r12, lr
+; CHECK-NEXT:    msr apsr_nzcvq, lr
+; CHECK-NEXT:    bxns lr
 entry:
   %sub = add nsw i32 %x, -1
   %call = tail call i32 @g(i32 %sub)
   %add = add nsw i32 %call, 1
   ret i32 %add
 }
-; CHECK-LABEL: f2:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .save  {r7, lr}
-; CHECK-NEXT:  push   {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT:  .pad   #4
-; CHECK-NEXT:  sub    sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; ...
-; CHECK:       add    sp, #4
-; CHECK-NEXT:  ldr    r12, [sp], #4
-; CHECK-NEXT:  pop.w  {r7, lr}
-; CHECK-NEXT:  aut    r12, lr, sp
-; CHECK-NEXT:  mrs    r12, control
-; ...
-; CHECK:       bxns    lr
 
 declare dso_local i32 @g(i32) local_unnamed_addr
 
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
index 4dfac252e2314c..9a8bba47f33ad6 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
@@ -1,11 +1,33 @@
-; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK1
-; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -o - | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabi"
 
 @p = hidden local_unnamed_addr global ptr null, align 4
 
 define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r5, r2
+; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    movw r1, :lower16:p
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    movt r1, :upper16:p
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    ldr r4, [r1]
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    blx r4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %call = tail call i32 @g(i32 %a) #0
   %0 = load ptr, ptr @p, align 4
@@ -13,19 +35,6 @@ entry:
   ret i32 %call1
 }
 
-; CHECK1-LABEL: f
-; ...
-; CHECK1:       aut r12, lr, sp
-; CHECK1-NOT:   bx r12
-
-; CHECK2-LABEL: f
-; ...
-; CHECK2:       blx r4
-; CHECK2-NEXT:  ldr r12, [sp], #4
-; CHECK2-NEXT:  pop.w {r4, r5, r6, r7, lr}
-; CHECK2-NEXT:  aut r12, lr, sp
-; CHECK2-NEXT:  bx lr
-
 declare dso_local i32 @g(i32) local_unnamed_addr #0
 
 attributes #0 = { nounwind "sign-return-address"="non-leaf"}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
index 1b13e06546f152..ad94b7be8b2a60 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -21,12 +22,50 @@ target triple = "thumbv7m-arm-none-eabi"
 ; }
 
 define hidden i32 @h(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: h:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    bx lr
 entry:
   %add = add nsw i32 %b, %a
   ret i32 %add
 }
 
 define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r6, -8
+; CHECK-NEXT:    .cfi_offset r5, -12
+; CHECK-NEXT:    .cfi_offset r4, -16
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB1_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r0, #2
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB1_3: @ %return
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -48,34 +87,39 @@ return:                                           ; preds = %entry, %if.end
   ret i32 %retval.0
 }
 
-; CHECK-LABEL: f:
-; ...
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .save  {r4, r5, r6, lr}
-; CHECK-NEXT:  push   {r4, r5, r6, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .cfi_offset lr, -4
-; CHECK-NEXT:  .cfi_offset r6, -8
-; CHECK-NEXT:  .cfi_offset r5, -12
-; CHECK-NEXT:  .cfi_offset r4, -16
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 20
-; CHECK-NEXT:  .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:  .pad    #4
-; CHECK-NEXT:  sub     sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 24
-; ...
-; CHECK:        bl  OUTLINED_FUNCTION_0
-; ...
-; CHECK:        add    sp, #4
-; CHECK-NEXT:   ldr    r12, [sp], #4
-; CHECK-NEXT:   pop.w  {r4, r5, r6, lr}
-; CHECK-NEXT:   aut    r12, lr, sp
-; CHECK-NEXT:   bx     lr
-
-
 define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: g:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r6, -8
+; CHECK-NEXT:    .cfi_offset r5, -12
+; CHECK-NEXT:    .cfi_offset r4, -16
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB2_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    b .LBB2_3
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB2_3: @ %return
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -96,30 +140,6 @@ return:                                           ; preds = %entry, %if.end
   %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ]
   ret i32 %retval.0
 }
-; CHECK-LABEL: g:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .save  {r4, r5, r6, lr}
-; CHECK-NEXT:  push   {r4, r5, r6, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .cfi_offset lr, -4
-; CHECK-NEXT:  .cfi_offset r6, -8
-; CHECK-NEXT:  .cfi_offset r5, -12
-; CHECK-NEXT:  .cfi_offset r4, -16
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 20
-; CHECK-NEXT:  .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:  .pad   #4
-; CHECK-NEXT:  sub    sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 24
-; ...
-; CHECK:        bl  OUTLINED_FUNCTION_0
-; ...
-; CHECK:       add    sp, #4
-; CHECK-NEXT:  ldr    r12, [sp], #4
-; CHECK-NEXT:  pop.w  {r4, r5, r6, lr}
-; CHECK-NEXT:  aut    r12, lr, sp
-; CHECK-NEXT:  bx     lr
 
 ; CHECK-LABEL: OUTLINED_FUNCTION_0:
 ; CHECK:       pac    r12, lr, sp
@@ -158,7 +178,7 @@ attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable "
 ; UNWIND-LABEL: FunctionAddress: 0x5C
 ; UNWIND:       0xB4      ; pop ra_auth_code
 ; UNWIND:       0x84 0x00 ; pop {lr}
- 
+
 ; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0
 ; UNWIND-LABEL: 00000005 {{.*}} f
 ; UNWIND-LABEL: 00000031 {{.*}} g
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
index 38c23977b623f9..c0b45c0f90eb25 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -27,6 +28,44 @@ target triple = "thumbv7m-arm-none-eabi"
 @_ZTIi = external dso_local constant ptr
 
 define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1hii:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r7, -8
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    ble .LBB0_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_2: @ %if.then
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    bl __cxa_allocate_exception
+; CHECK-NEXT:    movs r1, #1
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    str r1, [r0]
+; CHECK-NEXT:    ldr r1, .LCPI0_0
+; CHECK-NEXT:    bl __cxa_throw
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long _ZTIi
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %if.then, label %if.end
@@ -42,31 +81,47 @@ if.end:                                           ; preds = %entry
   ret i32 %add
 }
 
-; CHECK-LABEL: _Z1hii:
-; ...
-; CHECK:    pac    r12, lr, sp
-; CHECK-NEXT:    .save    {r7, lr}
-; CHECK-NEXT:    push    {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .save    {ra_auth_code}
-; CHECK-NEXT:    str    r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
-; CHECK-NEXT:    .pad    #4
-; CHECK-NEXT:    sub    sp, #4
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; ...
-; CHECK-NOT: pac
-; CHECK: aut
-; CHECK:     .cfi_endproc
-
 declare dso_local ptr @__cxa_allocate_exception(i32) local_unnamed_addr
 
 declare dso_local void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
 
 define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiiii:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r6, -8
+; CHECK-NEXT:    .cfi_offset r5, -12
+; CHECK-NEXT:    .cfi_offset r4, -16
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB1_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r1, r0, r6
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    adds r1, r4, r5
+; CHECK-NEXT:    sdiv r0, r0, r1
+; CHECK-NEXT:    adds r0, #2
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB1_3: @ %return
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -85,35 +140,43 @@ return:                                           ; preds = %entry, %if.end
   ret i32 %retval.0
 }
 
-; CHECK-LABEL: _Z1fiiii:
-; ...
-; CHECK:    pac    r12, lr, sp
-; CHECK-NEXT:    .save    {r4, r5, r6, lr}
-; CHECK-NEXT:    push    {r4, r5, r6, lr}
+define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1giiii:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, -4
 ; CHECK-NEXT:    .cfi_offset r6, -8
 ; CHECK-NEXT:    .cfi_offset r5, -12
 ; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save    {ra_auth_code}
-; CHECK-NEXT:    str    r12, [sp, #-4]!
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
 ; CHECK-NEXT:    .cfi_def_cfa_offset 20
 ; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:    .pad    #4
-; CHECK-NEXT:    sub    sp, #4
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; ...
-; CHECK:    bl	OUTLINED_FUNCTION_0
-; ...
-; CHECK:    add    sp, #4
-; CHECK-NEXT:    ldr    r12, [sp], #4
-; CHECK-NEXT:    pop.w    {r4, r5, r6, lr}
-; CHECK-NEXT:    aut    r12, lr, sp
-; CHECK-NEXT:    bx    lr
-
-
-
-define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB2_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r1, r0, r6
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    adds r1, r4, r5
+; CHECK-NEXT:    sdiv r0, r0, r1
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    b .LBB2_3
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB2_3: @ %return
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -132,33 +195,6 @@ return:                                           ; preds = %entry, %if.end
   ret i32 %retval.0
 }
 
-; CHECK-LABEL: _Z1giiii:
-; ...
-; CHECK:    pac    r12, lr, sp
-; CHECK-NEXT:    .save    {r4, r5, r6, lr}
-; CHECK-NEXT:    push    {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save    {ra_auth_code}
-; CHECK-NEXT:    str    r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:    .pad    #4
-; CHECK-NEXT:    sub    sp, #4
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; ...
-; CHECK:    bl	OUTLINED_FUNCTION_0
-; ...
-; CHECK:    add    sp, #4
-; CHECK-NEXT:    ldr    r12, [sp], #4
-; CHECK-NEXT:    pop.w    {r4, r5, r6, lr}
-; CHECK-NEXT:    aut    r12, lr, sp
-; CHECK-NEXT:    bx    lr
-
-
 ; CHEK-LABEL: OUTLINED_FUNCTION_0:
 ; CHECK-NOT: pac
 ; CHECK-NOT: aut
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
index 5dce6752c065e1..012120d976810b 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -15,6 +16,42 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; }
 
 define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
+; CHECK-LABEL: _Z1fv:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r6, r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r7, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r4, -16
+; CHECK-NEXT:    .setfp r7, sp, #8
+; CHECK-NEXT:    add r7, sp, #8
+; CHECK-NEXT:    .cfi_def_cfa r7, 8
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .pad #44
+; CHECK-NEXT:    sub sp, #44
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    bfc r4, #0, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    bl _Z1giPi
+; CHECK-NEXT:    ldm.w sp, {r0, r1, r2, r3}
+; CHECK-NEXT:    sub.w r4, r7, #12
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    add r0, r2
+; CHECK-NEXT:    add r0, r3
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r6, r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %a = alloca [4 x i32], align 32
   %call = call i32 @_Z1giPi(i32 4, ptr nonnull %a)
@@ -31,29 +68,6 @@ entry:
   ret i32 %add.3
 }
 
-; CHECK-LABEL: _Z1fv:
-; CHECK:      pac     r12, lr, sp
-; CHECK:      .save   {r4, r6, r7, lr}
-; CHECK-NEXT: push    {r4, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .setfp  r7, sp, #8
-; CHECK-NEXT: add     r7, sp, #8
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save   {ra_auth_code}
-; CHECK-NEXT: str     r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad    #44
-; CHECK-NEXT: sub     sp, #44
-; CHECK:      ldr     r12, [sp], #4
-; CHECK-NEXT: pop.w   {r4, r6, r7, lr}
-; CHECK-NEXT: aut     r12, lr, sp
-; CHECK-NEXT: bx      lr
-
-
 declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr
 
 !llvm.module.flags = !{!0, !1, !2}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
index d027c9e8c7b548..63adc78fe849c3 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
@@ -5,6 +6,54 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 %"struct.std::__va_list" = type { ptr }
 
 define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiz:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, #12
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_offset lr, -16
+; CHECK-NEXT:    .cfi_offset r7, -20
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -24
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 28
+; CHECK-NEXT:    add.w r12, sp, #16
+; CHECK-NEXT:    cmp r0, #1
+; CHECK-NEXT:    stm.w r12, {r1, r2, r3}
+; CHECK-NEXT:    add r1, sp, #16
+; CHECK-NEXT:    str r1, [sp]
+; CHECK-NEXT:    blt .LBB0_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.lr.ph
+; CHECK-NEXT:    ldr r1, [sp]
+; CHECK-NEXT:    dls lr, r0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:  .LBB0_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    str r1, [sp]
+; CHECK-NEXT:    ldr r2, [r1, #-4]
+; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:    add r0, r2
+; CHECK-NEXT:    le lr, .LBB0_2
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:  .LBB0_4: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    add sp, #12
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %ap = alloca %"struct.std::__va_list", align 4
   call void @llvm.va_start(ptr nonnull %ap)
@@ -33,34 +82,6 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: _Z1fiz:
-; CHECK:      pac    r12, lr, sp
-; CHECK-NEXT: .pad    #12
-; CHECK-NEXT: sub    sp, #12
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .save    {r7, lr}
-; CHECK-NEXT: push    {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .save    {ra_auth_code}
-; CHECK-NEXT: str    r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .pad    #4
-; CHECK-NEXT: sub    sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 28
-; ...
-; CHECK:      add.w r[[N:[0-9]*]], sp, #16
-; CHECK:      stm.w r[[N]], {r1, r2, r3}
-; ...
-; CHECK:      add    sp, #4
-; CHECK-NEXT: ldr    r12, [sp], #4
-; CHECK-NEXT: pop.w    {r7, lr}
-; CHECK-NEXT: add    sp, #12
-; CHECK-NEXT: aut    r12, lr, sp
-; CHECK-NEXT: bx    lr
-
 declare void @llvm.va_start(ptr) #1
 declare void @llvm.va_end(ptr) #1
 
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
index 8019cd5b6109eb..38b5b7a16e01bd 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -19,6 +20,54 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 %"struct.std::__va_list" = type { ptr }
 
 define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiz:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, #12
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 28
+; CHECK-NEXT:    .cfi_offset lr, -16
+; CHECK-NEXT:    .cfi_offset r7, -20
+; CHECK-NEXT:    .cfi_offset r5, -24
+; CHECK-NEXT:    .cfi_offset r4, -28
+; CHECK-NEXT:    .save {ra_auth_code}
+; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -32
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    add r0, sp, #28
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r4, #1
+; CHECK-NEXT:    stm r0!, {r1, r2, r3}
+; CHECK-NEXT:    add r0, sp, #28
+; CHECK-NEXT:    str r0, [sp, #4]
+; CHECK-NEXT:    blt .LBB0_2
+; CHECK-NEXT:  .LBB0_1: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    adds r1, r0, #4
+; CHECK-NEXT:    str r1, [sp, #4]
+; CHECK-NEXT:    ldr r0, [r0]
+; CHECK-NEXT:    bl _Z1gi
+; CHECK-NEXT:    add r5, r0
+; CHECK-NEXT:    subs r4, #1
+; CHECK-NEXT:    bne .LBB0_1
+; CHECK-NEXT:  .LBB0_2: @ %for.cond.cleanup
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    ldr r12, [sp], #4
+; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
+; CHECK-NEXT:    add sp, #12
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %ap = alloca %"struct.std::__va_list", align 4
   call void @llvm.va_start(ptr nonnull %ap)
@@ -47,36 +96,6 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: _Z1fiz:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .pad   #12
-; CHECK-NEXT:  sub    sp, #12
-; CHECK-NEXT:  .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .save   {r4, r5, r7, lr}
-; CHECK-NEXT:  push    {r4, r5, r7, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 28
-; CHECK-NEXT:  .cfi_offset lr, -16
-; CHECK-NEXT:  .cfi_offset r7, -20
-; CHECK-NEXT:  .cfi_offset r5, -24
-; CHECK-NEXT:  .cfi_offset r4, -28
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset ra_auth_code, -32
-; CHECK-NEXT:  .pad   #8
-; CHECK-NEXT:  sub    sp, #8
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; ...
-; CHECK:       add    r[[N:[0-9]*]], sp, #28
-; CHECK:       stm    r[[N]]!, {r1, r2, r3}
-; ...
-; CHECK:       add    sp, #8
-; CHECK-NEXT:  ldr    r12, [sp], #4
-; CHECK-NEXT:  pop.w  {r4, r5, r7, lr}
-; CHECK-NEXT:  add    sp, #12
-; CHECK-NEXT:  aut    r12, lr, sp
-; CHECK-NEXT:  bx     lr
-
 declare void @llvm.va_start(ptr) #1
 declare void @llvm.va_end(ptr) #1
 
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index c1d17a7587be05..ccab35b7331141 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
@@ -14,6 +15,92 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; }
 
 define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset r7, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .cfi_def_cfa r7, 8
+; CHECK-NEXT:    .save {r8, r9, ra_auth_code}
+; CHECK-NEXT:    push.w {r8, r9, r12}
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -24
+; CHECK-NEXT:    .cfi_offset r9, -28
+; CHECK-NEXT:    .cfi_offset r8, -32
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    movs r0, #7
+; CHECK-NEXT:    add.w r0, r0, r5, lsl #2
+; CHECK-NEXT:    bic r0, r0, #7
+; CHECK-NEXT:    sub.w r4, sp, r0
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    cmp r5, #1
+; CHECK-NEXT:    blt .LBB0_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    subs r0, r5, #1
+; CHECK-NEXT:    and r12, r5, #3
+; CHECK-NEXT:    cmp r0, #3
+; CHECK-NEXT:    bhs .LBB0_4
+; CHECK-NEXT:  @ %bb.2:
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    b .LBB0_9
+; CHECK-NEXT:  .LBB0_4: @ %for.body.preheader.new
+; CHECK-NEXT:    bic r0, r5, #3
+; CHECK-NEXT:    movs r2, #1
+; CHECK-NEXT:    subs r0, #4
+; CHECK-NEXT:    sub.w r3, r4, #16
+; CHECK-NEXT:    add.w lr, r2, r0, lsr #2
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:  .LBB0_5: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr r5, [r3, #16]!
+; CHECK-NEXT:    adds r2, #4
+; CHECK-NEXT:    add r0, r5
+; CHECK-NEXT:    ldrd r5, r1, [r3, #4]
+; CHECK-NEXT:    ldr r6, [r3, #12]
+; CHECK-NEXT:    add r0, r5
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    add r0, r6
+; CHECK-NEXT:    le lr, .LBB0_5
+; CHECK-NEXT:  .LBB0_6: @ %for.cond.cleanup.loopexit.unr-lcssa
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    beq .LBB0_9
+; CHECK-NEXT:  @ %bb.7: @ %for.body.epil
+; CHECK-NEXT:    ldr.w r3, [r4, r2, lsl #2]
+; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    add r0, r3
+; CHECK-NEXT:    beq .LBB0_9
+; CHECK-NEXT:  @ %bb.8: @ %for.body.epil.1
+; CHECK-NEXT:    add.w r2, r4, r2, lsl #2
+; CHECK-NEXT:    cmp.w r12, #2
+; CHECK-NEXT:    ldr r1, [r2, #4]
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    ldrne r1, [r2, #8]
+; CHECK-NEXT:    addne r0, r1
+; CHECK-NEXT:  .LBB0_9: @ %for.cond.cleanup
+; CHECK-NEXT:    sub.w r4, r7, #24
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop.w {r8, r9, r12}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %vla = alloca i32, i32 %n, align 4
   %call = call i32 @g(i32 %n, ptr nonnull %vla) #0
@@ -88,32 +175,6 @@ for.body.epil.2:                                  ; preds = %for.body.epil.1
   br label %for.cond.cleanup
 }
 
-; CHECK-LABEL: f:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT: .save   {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push    {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r5, -16
-; CHECK-NEXT: .cfi_offset r4, -20
-; CHECK-NEXT: .setfp r7, sp, #12
-; CHECK-NEXT: add    r7, sp, #12
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save    {r8, r9, ra_auth_code}
-; CHECK-NEXT: push.w   {r8, r9, r12}
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .cfi_offset r9, -28
-; CHECK-NEXT: .cfi_offset r8, -32
-; ...
-; CHECK:      sub.w  r[[N:[0-9]*]], r7, #24
-; CHECK-NEXT: mov    sp, r[[N]]
-; CHECK-NEXT: pop.w  {r8, r9, r12}
-; CHECK-NEXT: pop.w  {r4, r5, r6, r7, lr}
-; CHECK-NEXT: aut    r12, lr, sp
-; CHECK-NEXT: bx     lr
-
 declare dso_local i32 @g(i32, ptr) local_unnamed_addr #0
 
 attributes #0 = { nounwind "sign-return-address"="non-leaf"}

>From 51dc9bd80674db96a7e0a345687f784c6891c182 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 13:48:55 +0100
Subject: [PATCH 2/2] [ARM] Optimise non-ABI frame pointers

With -fomit-frame-pointer, even if we set up a frame pointer for other
reasons (e.g. variable-sized or over-aligned stack allocations), we
don't need to create an ABI-compliant frame record. This means that we
can save all of the general-purpose registers in one push, instead of
splitting it to ensure that the frame pointer and link register are
adjacent on the stack, saving two instructions per function.
---
 llvm/lib/Target/ARM/ARMFrameLowering.cpp      |  11 +
 llvm/lib/Target/ARM/ARMSubtarget.cpp          |  30 +-
 llvm/lib/Target/ARM/ARMSubtarget.h            |   4 +
 llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll |  22 +-
 llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll    |  72 ++-
 .../CodeGen/Thumb2/pacbti-m-frame-chain.ll    | 434 ++++++++++++++----
 .../Thumb2/pacbti-m-indirect-tail-call.ll     |   9 +-
 .../CodeGen/Thumb2/pacbti-m-outliner-3.ll     |  62 +--
 .../CodeGen/Thumb2/pacbti-m-outliner-4.ll     |  98 ++--
 .../test/CodeGen/Thumb2/pacbti-m-overalign.ll |  30 +-
 .../test/CodeGen/Thumb2/pacbti-m-stack-arg.ll |   9 +-
 .../test/CodeGen/Thumb2/pacbti-m-varargs-1.ll |  16 +-
 .../test/CodeGen/Thumb2/pacbti-m-varargs-2.ll |  30 +-
 llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll      |  29 +-
 14 files changed, 523 insertions(+), 333 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 57e2d5525a1aaa..a2fb410b8cde52 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2998,6 +2998,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
       // on the stack.
       CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
       break;
+    case ARMSubtarget::NoSplit:
+      assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
+             "ABI-required frame pointers need a CSR split when signing return "
+             "address.");
+      CSI.insert(find_if(CSI,
+                         [=](const auto &CS) {
+                           Register Reg = CS.getReg();
+                           return Reg != ARM::LR;
+                         }),
+                 CalleeSavedInfo(ARM::R12));
+      break;
     default:
       llvm_unreachable("Unexpected CSR split with return address signing");
     }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 9adfb1fab5f084..e3978232540b8c 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
   const std::vector<CalleeSavedInfo> CSI =
       MF.getFrameInfo().getCalleeSavedInfo();
 
-  // Returns SplitR7 if the frame setup must be split into two separate pushes
-  // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
-  // always required on Thumb1-only targets, as the push and pop instructions
-  // can't access the high registers. This is also required when R7 is the frame
-  // pointer and frame pointer elimiination is disabled, or branch signing is
-  // enabled and AAPCS is disabled.
-  if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
-       !createAAPCSFrameChain()) ||
-      (getFramePointerReg() == ARM::R7 &&
-       MF.getTarget().Options.DisableFramePointerElim(MF)) ||
-      isThumb1Only())
+  // Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
+  // cannot use high registers except for lr.
+  if (isThumb1Only())
+    return SplitR7;
+
+  // If R7 is the frame pointer, we must split at R7 to ensure that the
+  // previous frame pointer (R7) and return address (LR) are adjacent on the
+  // stack, to form a valid frame record.
+  if (getFramePointerReg() == ARM::R7 &&
+      MF.getTarget().Options.DisableFramePointerElim(MF))
     return SplitR7;
 
   // Returns SplitR11WindowsSEH when the stack pointer needs to be
@@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
       (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
     return SplitR11WindowsSEH;
 
-  // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
-  // other in the list of callee saved registers in a frame, and branch
-  // signing is enabled.
+  // Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
+  // and LR to be adjacent on the stack, and branch signing is enabled,
+  // requiring R12 to be on the stack.
   if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
-      getFramePointerReg() == ARM::R11)
+      getFramePointerReg() == ARM::R11 &&
+      MF.getTarget().Options.DisableFramePointerElim(MF))
     return SplitR11AAPCSSignRA;
   return NoSplit;
 }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 214c5f1b45e556..2f7af05a259f8f 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     /// push {r0-r7, lr}
     /// push {r8-r12}
     /// vpush {d8-d15}
+    /// Note that Thumb1 changes this layout when the frame pointer is R11,
+    /// using a longer sequence of instructions because R11 can't be used by a
+    /// Thumb1 push instruction. This doesn't currently have a separate enum
+    /// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
     SplitR7,
 
     /// When the stack frame size is not known (because of variable-sized
diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
index c2a2ed2d0c8e8a..a0e6f9bf9b30d9 100644
--- a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
@@ -11,15 +11,12 @@ define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-a
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pacbti r12, lr, sp
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
@@ -27,8 +24,7 @@ define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-a
 ; CHECK-NEXT:    bl _Z1gi
 ; CHECK-NEXT:    subs r0, #1
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -42,6 +38,8 @@ declare dso_local i32 @_Z1gi(i32)
 
 ; UNWIND-LABEL: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
+
+
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
index 64c5a6c7030145..31f8ecddcb986c 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
@@ -22,15 +22,12 @@ define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
@@ -38,8 +35,7 @@ define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf
 ; CHECK-NEXT:    bl g
 ; CHECK-NEXT:    adds r0, #1
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -56,20 +52,16 @@ define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
 ; CHECK-NEXT:    pac r12, lr, sp
 ; CHECK-NEXT:    vstr fpcxtns, [sp, #-4]!
 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset lr, -8
-; CHECK-NEXT:    .cfi_offset r7, -12
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -16
+; CHECK-NEXT:    .cfi_offset lr, -8
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .cfi_offset r7, -16
 ; CHECK-NEXT:    subs r0, #1
 ; CHECK-NEXT:    bl g
 ; CHECK-NEXT:    adds r0, #1
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
 ; CHECK-NEXT:    vldr fpcxtns, [sp], #4
 ; CHECK-NEXT:    aut r12, lr, sp
@@ -87,15 +79,12 @@ define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
@@ -103,8 +92,7 @@ define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
 ; CHECK-NEXT:    bl g
 ; CHECK-NEXT:    adds r0, #1
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    mrs r12, control
 ; CHECK-NEXT:    tst.w r12, #8
@@ -149,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe
 
 ; UNWIND-LABEL: FunctionAddress: 0x0
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
+
 
-; UNWIND-LABEL: FunctionAddress: 0x24
-; UNWIND:       0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
+; UNWIND-LABEL: FunctionAddress: 0x1E
+; UNWIND:       0x80 0x08 ; pop {r7}
+; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x54
+; UNWIND-LABEL: FunctionAddress: 0x48
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
 ; UNWIND-LABEL: 00000001 {{.*}} f0
-; UNWIND-LABEL: 00000025 {{.*}} f1
-; UNWIND-LABEL: 00000055 {{.*}} f2
+; UNWIND-LABEL: 0000001f {{.*}} f1
+; UNWIND-LABEL: 00000049 {{.*}} f2
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
index 8bcf87130c5400..e9c7f2236c0ffc 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
@@ -1,32 +1,56 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none                          | FileCheck %s --check-prefix=R7
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all                           | FileCheck %s --check-prefix=R7-ABI
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all  -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11-ABI
 
 ; int test1() {
 ;     return 0;
 ; }
 define i32 @test1() "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test1:
-; CHECK:         .cfi_sections .debug_frame
-; CHECK-NEXT:    .cfi_startproc
-; CHECK-NEXT:  @ %bb.0: @ %entry
-; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 4
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -4
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push.w {r11, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset lr, -8
-; CHECK-NEXT:    .cfi_offset r11, -12
-; CHECK-NEXT:    .setfp r11, sp
-; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    .cfi_def_cfa_register r11
-; CHECK-NEXT:    movs r0, #0
-; CHECK-NEXT:    pop.w {r11, lr}
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    aut r12, lr, sp
-; CHECK-NEXT:    bx lr
+; R7-LABEL: test1:
+; R7:         .cfi_sections .debug_frame
+; R7-NEXT:    .cfi_startproc
+; R7-NEXT:  @ %bb.0: @ %entry
+; R7-NEXT:    movs r0, #0
+; R7-NEXT:    bx lr
+;
+; R7-ABI-LABEL: test1:
+; R7-ABI:         .cfi_sections .debug_frame
+; R7-ABI-NEXT:    .cfi_startproc
+; R7-ABI-NEXT:  @ %bb.0: @ %entry
+; R7-ABI-NEXT:    movs r0, #0
+; R7-ABI-NEXT:    bx lr
+;
+; R11-LABEL: test1:
+; R11:         .cfi_sections .debug_frame
+; R11-NEXT:    .cfi_startproc
+; R11-NEXT:  @ %bb.0: @ %entry
+; R11-NEXT:    movs r0, #0
+; R11-NEXT:    bx lr
+;
+; R11-ABI-LABEL: test1:
+; R11-ABI:         .cfi_sections .debug_frame
+; R11-ABI-NEXT:    .cfi_startproc
+; R11-ABI-NEXT:  @ %bb.0: @ %entry
+; R11-ABI-NEXT:    pac r12, lr, sp
+; R11-ABI-NEXT:    .save {ra_auth_code}
+; R11-ABI-NEXT:    str r12, [sp, #-4]!
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 4
+; R11-ABI-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT:    .save {r11, lr}
+; R11-ABI-NEXT:    push.w {r11, lr}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 12
+; R11-ABI-NEXT:    .cfi_offset lr, -8
+; R11-ABI-NEXT:    .cfi_offset r11, -12
+; R11-ABI-NEXT:    .setfp r11, sp
+; R11-ABI-NEXT:    mov r11, sp
+; R11-ABI-NEXT:    .cfi_def_cfa_register r11
+; R11-ABI-NEXT:    movs r0, #0
+; R11-ABI-NEXT:    pop.w {r11, lr}
+; R11-ABI-NEXT:    ldr r12, [sp], #4
+; R11-ABI-NEXT:    aut r12, lr, sp
+; R11-ABI-NEXT:    bx lr
 entry:
     ret i32 0
 }
@@ -36,37 +60,127 @@ entry:
 ;   bar(a);
 ; }
 define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test2:
-; CHECK:         .cfi_startproc
-; CHECK-NEXT:  @ %bb.0: @ %entry
-; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r7, ra_auth_code}
-; CHECK-NEXT:    push.w {r4, r7, r12}
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .cfi_offset r4, -12
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push.w {r11, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset lr, -16
-; CHECK-NEXT:    .cfi_offset r11, -20
-; CHECK-NEXT:    .setfp r11, sp
-; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    .cfi_def_cfa_register r11
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    movs r1, #7
-; CHECK-NEXT:    add.w r0, r1, r0, lsl #2
-; CHECK-NEXT:    bic r0, r0, #7
-; CHECK-NEXT:    sub.w r0, sp, r0
-; CHECK-NEXT:    mov sp, r0
-; CHECK-NEXT:    bl take_ptr
-; CHECK-NEXT:    mov sp, r11
-; CHECK-NEXT:    pop.w {r11, lr}
-; CHECK-NEXT:    pop.w {r4, r7, r12}
-; CHECK-NEXT:    aut r12, lr, sp
-; CHECK-NEXT:    bx lr
+; R7-LABEL: test2:
+; R7:         .cfi_startproc
+; R7-NEXT:  @ %bb.0: @ %entry
+; R7-NEXT:    pac r12, lr, sp
+; R7-NEXT:    .save {r4, r6, r7, ra_auth_code, lr}
+; R7-NEXT:    push.w {r4, r6, r7, r12, lr}
+; R7-NEXT:    .cfi_def_cfa_offset 20
+; R7-NEXT:    .cfi_offset lr, -4
+; R7-NEXT:    .cfi_offset ra_auth_code, -8
+; R7-NEXT:    .cfi_offset r7, -12
+; R7-NEXT:    .cfi_offset r6, -16
+; R7-NEXT:    .cfi_offset r4, -20
+; R7-NEXT:    .setfp r7, sp, #8
+; R7-NEXT:    add r7, sp, #8
+; R7-NEXT:    .cfi_def_cfa r7, 12
+; R7-NEXT:    .pad #4
+; R7-NEXT:    sub sp, #4
+; R7-NEXT:    movs r1, #7
+; R7-NEXT:    add.w r0, r1, r0, lsl #2
+; R7-NEXT:    bic r0, r0, #7
+; R7-NEXT:    sub.w r0, sp, r0
+; R7-NEXT:    mov sp, r0
+; R7-NEXT:    bl take_ptr
+; R7-NEXT:    sub.w r4, r7, #8
+; R7-NEXT:    mov sp, r4
+; R7-NEXT:    pop.w {r4, r6, r7, r12, lr}
+; R7-NEXT:    aut r12, lr, sp
+; R7-NEXT:    bx lr
+;
+; R7-ABI-LABEL: test2:
+; R7-ABI:         .cfi_startproc
+; R7-ABI-NEXT:  @ %bb.0: @ %entry
+; R7-ABI-NEXT:    pac r12, lr, sp
+; R7-ABI-NEXT:    .save {r4, r6, r7, lr}
+; R7-ABI-NEXT:    push {r4, r6, r7, lr}
+; R7-ABI-NEXT:    .cfi_def_cfa_offset 16
+; R7-ABI-NEXT:    .cfi_offset lr, -4
+; R7-ABI-NEXT:    .cfi_offset r7, -8
+; R7-ABI-NEXT:    .cfi_offset r6, -12
+; R7-ABI-NEXT:    .cfi_offset r4, -16
+; R7-ABI-NEXT:    .setfp r7, sp, #8
+; R7-ABI-NEXT:    add r7, sp, #8
+; R7-ABI-NEXT:    .cfi_def_cfa r7, 8
+; R7-ABI-NEXT:    .save {ra_auth_code}
+; R7-ABI-NEXT:    str r12, [sp, #-4]!
+; R7-ABI-NEXT:    .cfi_offset ra_auth_code, -20
+; R7-ABI-NEXT:    .pad #4
+; R7-ABI-NEXT:    sub sp, #4
+; R7-ABI-NEXT:    movs r1, #7
+; R7-ABI-NEXT:    add.w r0, r1, r0, lsl #2
+; R7-ABI-NEXT:    bic r0, r0, #7
+; R7-ABI-NEXT:    sub.w r0, sp, r0
+; R7-ABI-NEXT:    mov sp, r0
+; R7-ABI-NEXT:    bl take_ptr
+; R7-ABI-NEXT:    sub.w r4, r7, #12
+; R7-ABI-NEXT:    mov sp, r4
+; R7-ABI-NEXT:    ldr r12, [sp], #4
+; R7-ABI-NEXT:    pop.w {r4, r6, r7, lr}
+; R7-ABI-NEXT:    aut r12, lr, sp
+; R7-ABI-NEXT:    bx lr
+;
+; R11-LABEL: test2:
+; R11:         .cfi_startproc
+; R11-NEXT:  @ %bb.0: @ %entry
+; R11-NEXT:    pac r12, lr, sp
+; R11-NEXT:    .save {r4, r7, r11, ra_auth_code, lr}
+; R11-NEXT:    push.w {r4, r7, r11, r12, lr}
+; R11-NEXT:    .cfi_def_cfa_offset 20
+; R11-NEXT:    .cfi_offset lr, -4
+; R11-NEXT:    .cfi_offset ra_auth_code, -8
+; R11-NEXT:    .cfi_offset r11, -12
+; R11-NEXT:    .cfi_offset r7, -16
+; R11-NEXT:    .cfi_offset r4, -20
+; R11-NEXT:    .setfp r11, sp, #8
+; R11-NEXT:    add.w r11, sp, #8
+; R11-NEXT:    .cfi_def_cfa r11, 12
+; R11-NEXT:    .pad #4
+; R11-NEXT:    sub sp, #4
+; R11-NEXT:    movs r1, #7
+; R11-NEXT:    add.w r0, r1, r0, lsl #2
+; R11-NEXT:    bic r0, r0, #7
+; R11-NEXT:    sub.w r0, sp, r0
+; R11-NEXT:    mov sp, r0
+; R11-NEXT:    bl take_ptr
+; R11-NEXT:    sub.w r4, r11, #8
+; R11-NEXT:    mov sp, r4
+; R11-NEXT:    pop.w {r4, r7, r11, r12, lr}
+; R11-NEXT:    aut r12, lr, sp
+; R11-NEXT:    bx lr
+;
+; R11-ABI-LABEL: test2:
+; R11-ABI:         .cfi_startproc
+; R11-ABI-NEXT:  @ %bb.0: @ %entry
+; R11-ABI-NEXT:    pac r12, lr, sp
+; R11-ABI-NEXT:    .save {r4, r7, ra_auth_code}
+; R11-ABI-NEXT:    push.w {r4, r7, r12}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 12
+; R11-ABI-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT:    .cfi_offset r7, -8
+; R11-ABI-NEXT:    .cfi_offset r4, -12
+; R11-ABI-NEXT:    .save {r11, lr}
+; R11-ABI-NEXT:    push.w {r11, lr}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 20
+; R11-ABI-NEXT:    .cfi_offset lr, -16
+; R11-ABI-NEXT:    .cfi_offset r11, -20
+; R11-ABI-NEXT:    .setfp r11, sp
+; R11-ABI-NEXT:    mov r11, sp
+; R11-ABI-NEXT:    .cfi_def_cfa_register r11
+; R11-ABI-NEXT:    .pad #4
+; R11-ABI-NEXT:    sub sp, #4
+; R11-ABI-NEXT:    movs r1, #7
+; R11-ABI-NEXT:    add.w r0, r1, r0, lsl #2
+; R11-ABI-NEXT:    bic r0, r0, #7
+; R11-ABI-NEXT:    sub.w r0, sp, r0
+; R11-ABI-NEXT:    mov sp, r0
+; R11-ABI-NEXT:    bl take_ptr
+; R11-ABI-NEXT:    mov sp, r11
+; R11-ABI-NEXT:    pop.w {r11, lr}
+; R11-ABI-NEXT:    pop.w {r4, r7, r12}
+; R11-ABI-NEXT:    aut r12, lr, sp
+; R11-ABI-NEXT:    bx lr
 entry:
   %vla = alloca i32, i32 %n, align 4
   call void @take_ptr(ptr noundef nonnull %vla)
@@ -81,49 +195,175 @@ entry:
 ;     knr();
 ; }
 define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test3:
-; CHECK:         .cfi_startproc
-; CHECK-NEXT:  @ %bb.0: @ %entry
-; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, r7, ra_auth_code}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r12}
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .cfi_offset r6, -12
-; CHECK-NEXT:    .cfi_offset r5, -16
-; CHECK-NEXT:    .cfi_offset r4, -20
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push.w {r11, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 28
-; CHECK-NEXT:    .cfi_offset lr, -24
-; CHECK-NEXT:    .cfi_offset r11, -28
-; CHECK-NEXT:    .setfp r11, sp
-; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    .cfi_def_cfa_register r11
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r5, r2
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    blne knr
-; CHECK-NEXT:    adds r0, r5, #7
-; CHECK-NEXT:    bic r0, r0, #7
-; CHECK-NEXT:    sub.w r0, sp, r0
-; CHECK-NEXT:    mov sp, r0
-; CHECK-NEXT:    bl take_ptr
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    movs r1, #0
-; CHECK-NEXT:    bl __aeabi_fcmpeq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bleq knr
-; CHECK-NEXT:    mov sp, r11
-; CHECK-NEXT:    pop.w {r11, lr}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r12}
-; CHECK-NEXT:    aut r12, lr, sp
-; CHECK-NEXT:    bx lr
+; R7-LABEL: test3:
+; R7:         .cfi_startproc
+; R7-NEXT:  @ %bb.0: @ %entry
+; R7-NEXT:    pac r12, lr, sp
+; R7-NEXT:    .save {r4, r5, r6, r7, r8, ra_auth_code, lr}
+; R7-NEXT:    push.w {r4, r5, r6, r7, r8, r12, lr}
+; R7-NEXT:    .cfi_def_cfa_offset 28
+; R7-NEXT:    .cfi_offset lr, -4
+; R7-NEXT:    .cfi_offset ra_auth_code, -8
+; R7-NEXT:    .cfi_offset r8, -12
+; R7-NEXT:    .cfi_offset r7, -16
+; R7-NEXT:    .cfi_offset r6, -20
+; R7-NEXT:    .cfi_offset r5, -24
+; R7-NEXT:    .cfi_offset r4, -28
+; R7-NEXT:    .setfp r7, sp, #12
+; R7-NEXT:    add r7, sp, #12
+; R7-NEXT:    .cfi_def_cfa r7, 16
+; R7-NEXT:    .pad #4
+; R7-NEXT:    sub sp, #4
+; R7-NEXT:    cmp r0, #0
+; R7-NEXT:    mov r5, r2
+; R7-NEXT:    mov r4, r1
+; R7-NEXT:    it ne
+; R7-NEXT:    blne knr
+; R7-NEXT:    adds r0, r5, #7
+; R7-NEXT:    bic r0, r0, #7
+; R7-NEXT:    sub.w r0, sp, r0
+; R7-NEXT:    mov sp, r0
+; R7-NEXT:    bl take_ptr
+; R7-NEXT:    mov r0, r4
+; R7-NEXT:    movs r1, #0
+; R7-NEXT:    bl __aeabi_fcmpeq
+; R7-NEXT:    cmp r0, #0
+; R7-NEXT:    it eq
+; R7-NEXT:    bleq knr
+; R7-NEXT:    sub.w r4, r7, #12
+; R7-NEXT:    mov sp, r4
+; R7-NEXT:    pop.w {r4, r5, r6, r7, r8, r12, lr}
+; R7-NEXT:    aut r12, lr, sp
+; R7-NEXT:    bx lr
+;
+; R7-ABI-LABEL: test3:
+; R7-ABI:         .cfi_startproc
+; R7-ABI-NEXT:  @ %bb.0: @ %entry
+; R7-ABI-NEXT:    pac r12, lr, sp
+; R7-ABI-NEXT:    .save {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT:    push {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT:    .cfi_def_cfa_offset 20
+; R7-ABI-NEXT:    .cfi_offset lr, -4
+; R7-ABI-NEXT:    .cfi_offset r7, -8
+; R7-ABI-NEXT:    .cfi_offset r6, -12
+; R7-ABI-NEXT:    .cfi_offset r5, -16
+; R7-ABI-NEXT:    .cfi_offset r4, -20
+; R7-ABI-NEXT:    .setfp r7, sp, #12
+; R7-ABI-NEXT:    add r7, sp, #12
+; R7-ABI-NEXT:    .cfi_def_cfa r7, 8
+; R7-ABI-NEXT:    .save {r8, ra_auth_code}
+; R7-ABI-NEXT:    push.w {r8, r12}
+; R7-ABI-NEXT:    .cfi_offset ra_auth_code, -24
+; R7-ABI-NEXT:    .cfi_offset r8, -28
+; R7-ABI-NEXT:    .pad #4
+; R7-ABI-NEXT:    sub sp, #4
+; R7-ABI-NEXT:    cmp r0, #0
+; R7-ABI-NEXT:    mov r5, r2
+; R7-ABI-NEXT:    mov r4, r1
+; R7-ABI-NEXT:    it ne
+; R7-ABI-NEXT:    blne knr
+; R7-ABI-NEXT:    adds r0, r5, #7
+; R7-ABI-NEXT:    bic r0, r0, #7
+; R7-ABI-NEXT:    sub.w r0, sp, r0
+; R7-ABI-NEXT:    mov sp, r0
+; R7-ABI-NEXT:    bl take_ptr
+; R7-ABI-NEXT:    mov r0, r4
+; R7-ABI-NEXT:    movs r1, #0
+; R7-ABI-NEXT:    bl __aeabi_fcmpeq
+; R7-ABI-NEXT:    cmp r0, #0
+; R7-ABI-NEXT:    it eq
+; R7-ABI-NEXT:    bleq knr
+; R7-ABI-NEXT:    sub.w r4, r7, #20
+; R7-ABI-NEXT:    mov sp, r4
+; R7-ABI-NEXT:    pop.w {r8, r12}
+; R7-ABI-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT:    aut r12, lr, sp
+; R7-ABI-NEXT:    bx lr
+;
+; R11-LABEL: test3:
+; R11:         .cfi_startproc
+; R11-NEXT:  @ %bb.0: @ %entry
+; R11-NEXT:    pac r12, lr, sp
+; R11-NEXT:    .save {r4, r5, r6, r7, r11, ra_auth_code, lr}
+; R11-NEXT:    push.w {r4, r5, r6, r7, r11, r12, lr}
+; R11-NEXT:    .cfi_def_cfa_offset 28
+; R11-NEXT:    .cfi_offset lr, -4
+; R11-NEXT:    .cfi_offset ra_auth_code, -8
+; R11-NEXT:    .cfi_offset r11, -12
+; R11-NEXT:    .cfi_offset r7, -16
+; R11-NEXT:    .cfi_offset r6, -20
+; R11-NEXT:    .cfi_offset r5, -24
+; R11-NEXT:    .cfi_offset r4, -28
+; R11-NEXT:    .setfp r11, sp, #16
+; R11-NEXT:    add.w r11, sp, #16
+; R11-NEXT:    .cfi_def_cfa r11, 12
+; R11-NEXT:    .pad #4
+; R11-NEXT:    sub sp, #4
+; R11-NEXT:    cmp r0, #0
+; R11-NEXT:    mov r5, r2
+; R11-NEXT:    mov r4, r1
+; R11-NEXT:    it ne
+; R11-NEXT:    blne knr
+; R11-NEXT:    adds r0, r5, #7
+; R11-NEXT:    bic r0, r0, #7
+; R11-NEXT:    sub.w r0, sp, r0
+; R11-NEXT:    mov sp, r0
+; R11-NEXT:    bl take_ptr
+; R11-NEXT:    mov r0, r4
+; R11-NEXT:    movs r1, #0
+; R11-NEXT:    bl __aeabi_fcmpeq
+; R11-NEXT:    cmp r0, #0
+; R11-NEXT:    it eq
+; R11-NEXT:    bleq knr
+; R11-NEXT:    sub.w r4, r11, #16
+; R11-NEXT:    mov sp, r4
+; R11-NEXT:    pop.w {r4, r5, r6, r7, r11, r12, lr}
+; R11-NEXT:    aut r12, lr, sp
+; R11-NEXT:    bx lr
+;
+; R11-ABI-LABEL: test3:
+; R11-ABI:         .cfi_startproc
+; R11-ABI-NEXT:  @ %bb.0: @ %entry
+; R11-ABI-NEXT:    pac r12, lr, sp
+; R11-ABI-NEXT:    .save {r4, r5, r6, r7, ra_auth_code}
+; R11-ABI-NEXT:    push.w {r4, r5, r6, r7, r12}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 20
+; R11-ABI-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT:    .cfi_offset r7, -8
+; R11-ABI-NEXT:    .cfi_offset r6, -12
+; R11-ABI-NEXT:    .cfi_offset r5, -16
+; R11-ABI-NEXT:    .cfi_offset r4, -20
+; R11-ABI-NEXT:    .save {r11, lr}
+; R11-ABI-NEXT:    push.w {r11, lr}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 28
+; R11-ABI-NEXT:    .cfi_offset lr, -24
+; R11-ABI-NEXT:    .cfi_offset r11, -28
+; R11-ABI-NEXT:    .setfp r11, sp
+; R11-ABI-NEXT:    mov r11, sp
+; R11-ABI-NEXT:    .cfi_def_cfa_register r11
+; R11-ABI-NEXT:    .pad #4
+; R11-ABI-NEXT:    sub sp, #4
+; R11-ABI-NEXT:    cmp r0, #0
+; R11-ABI-NEXT:    mov r5, r2
+; R11-ABI-NEXT:    mov r4, r1
+; R11-ABI-NEXT:    it ne
+; R11-ABI-NEXT:    blne knr
+; R11-ABI-NEXT:    adds r0, r5, #7
+; R11-ABI-NEXT:    bic r0, r0, #7
+; R11-ABI-NEXT:    sub.w r0, sp, r0
+; R11-ABI-NEXT:    mov sp, r0
+; R11-ABI-NEXT:    bl take_ptr
+; R11-ABI-NEXT:    mov r0, r4
+; R11-ABI-NEXT:    movs r1, #0
+; R11-ABI-NEXT:    bl __aeabi_fcmpeq
+; R11-ABI-NEXT:    cmp r0, #0
+; R11-ABI-NEXT:    it eq
+; R11-ABI-NEXT:    bleq knr
+; R11-ABI-NEXT:    mov sp, r11
+; R11-ABI-NEXT:    pop.w {r11, lr}
+; R11-ABI-NEXT:    pop.w {r4, r5, r6, r7, r12}
+; R11-ABI-NEXT:    aut r12, lr, sp
+; R11-ABI-NEXT:    bx lr
 entry:
   %tobool.not = icmp eq i32 %c, 0
   br i1 %tobool.not, label %if.end, label %if.then
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
index 9a8bba47f33ad6..615af15e8b5679 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
@@ -9,10 +9,8 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
 ; CHECK-LABEL: f:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r4, r5, r6, r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r12, lr}
 ; CHECK-NEXT:    mov r7, r3
 ; CHECK-NEXT:    mov r5, r2
 ; CHECK-NEXT:    mov r6, r1
@@ -24,8 +22,7 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
 ; CHECK-NEXT:    ldr r4, [r1]
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    blx r4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
index ad94b7be8b2a60..d02d4b51d73b53 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
@@ -38,20 +38,15 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
 ; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    bmi .LBB1_2
 ; CHECK-NEXT:  @ %bb.1: @ %if.end
@@ -61,9 +56,7 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB1_2:
 ; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:  .LBB1_3: @ %return
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -92,20 +85,15 @@ define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
 ; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    bmi .LBB2_2
 ; CHECK-NEXT:  @ %bb.1: @ %if.end
@@ -115,9 +103,7 @@ define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:  .LBB2_3: @ %return
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -167,18 +153,20 @@ attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable "
 
 ; UNWIND-LABEL: FunctionAddress: 0x4
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x30
+; UNWIND-LABEL: FunctionAddress: 0x26
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x5C
+; UNWIND-LABEL: FunctionAddress: 0x48
 ; UNWIND:       0xB4      ; pop ra_auth_code
-; UNWIND:       0x84 0x00 ; pop {lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0
+; UNWIND-LABEL: 00000049 {{.*}} OUTLINED_FUNCTION_0
 ; UNWIND-LABEL: 00000005 {{.*}} f
-; UNWIND-LABEL: 00000031 {{.*}} g
+; UNWIND-LABEL: 00000027 {{.*}} g
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
index c0b45c0f90eb25..8777d517c4badc 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
@@ -33,25 +33,18 @@ define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
 ; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    push.w {r6, r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
 ; CHECK-NEXT:    cmp.w r0, #-1
 ; CHECK-NEXT:    ble .LBB0_2
 ; CHECK-NEXT:  @ %bb.1: @ %if.end
 ; CHECK-NEXT:    add r0, r1
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r3, r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:  .LBB0_2: @ %if.then
@@ -90,20 +83,15 @@ define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
 ; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    bmi .LBB1_2
 ; CHECK-NEXT:  @ %bb.1: @ %if.end
@@ -117,9 +105,7 @@ define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
 ; CHECK-NEXT:  .LBB1_2:
 ; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:  .LBB1_3: @ %return
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -145,20 +131,15 @@ define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
 ; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    bmi .LBB2_2
 ; CHECK-NEXT:  @ %bb.1: @ %if.end
@@ -172,9 +153,7 @@ define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
 ; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:  .LBB2_3: @ %return
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -213,32 +192,31 @@ attributes #2 = { noreturn "sign-return-address"="non-leaf" }
 
 
 ; UNWIND-LABEL: FunctionAddress: 0x0
-; UNWIND:       Opcodes
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x3C
-; UNWIND:       Opcodes
+; UNWIND-LABEL: FunctionAddress: 0x30
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x72
-; UNWIND:       Opcodes
+; UNWIND-LABEL: FunctionAddress: 0x5C
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0xA8
-; UNWIND:       Opcodes
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-LABEL: FunctionAddress: 0x88
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0xB0      ; finish
 
-; UNWIND: 000000a9 {{.*}} OUTLINED_FUNCTION_0
+; UNWIND: 00000089 {{.*}} OUTLINED_FUNCTION_0
 ; UWNIND: 00000001 {{.*}} _Z1hii
-; UWNIND: 0000003d {{.*}} _Z1fiiii
-; UWNIND: 00000073 {{.*}} _Z1giiii
+; UWNIND: 00000031 {{.*}} _Z1fiiii
+; UWNIND: 0000005d {{.*}} _Z1giiii
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
index 012120d976810b..5354303a034d4e 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
@@ -21,19 +21,17 @@ define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r6, r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .save {r4, r6, r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r6, r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
 ; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .cfi_offset r6, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    .cfi_offset r6, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
 ; CHECK-NEXT:    .setfp r7, sp, #8
 ; CHECK-NEXT:    add r7, sp, #8
-; CHECK-NEXT:    .cfi_def_cfa r7, 8
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .cfi_def_cfa r7, 12
 ; CHECK-NEXT:    .pad #44
 ; CHECK-NEXT:    sub sp, #44
 ; CHECK-NEXT:    mov r4, sp
@@ -43,13 +41,12 @@ define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
 ; CHECK-NEXT:    movs r0, #4
 ; CHECK-NEXT:    bl _Z1giPi
 ; CHECK-NEXT:    ldm.w sp, {r0, r1, r2, r3}
-; CHECK-NEXT:    sub.w r4, r7, #12
+; CHECK-NEXT:    sub.w r4, r7, #8
 ; CHECK-NEXT:    add r0, r1
 ; CHECK-NEXT:    add r0, r2
 ; CHECK-NEXT:    add r0, r3
 ; CHECK-NEXT:    mov sp, r4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r6, r7, lr}
+; CHECK-NEXT:    pop.w {r4, r6, r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
@@ -78,6 +75,7 @@ declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr
 
 ; UNWIND-LABEL:        FunctionAddress: 0x0
 ; UNWIND:          0x97      ; vsp = r7
-; UNWIND:          0x42      ; vsp = vsp - 12
-; UNWIND:          0xB4      ; pop ra_auth_code
-; UNWIND:          0x84 0x0D ; pop {r4, r6, r7, lr}
+; UNWIND-NEXT:     0x41      ; vsp = vsp - 8
+; UNWIND-NEXT:     0x80 0x0D ; pop {r4, r6, r7}
+; UNWIND-NEXT:     0xB4      ; pop ra_auth_code
+; UNWIND-NEXT:     0x84 0x00 ; pop {lr}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
index cae38b5e4a5a1b..c0c32de509b75d 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
@@ -19,17 +19,14 @@ define i32 @test_non_leaf(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %x) "s
 ; CHECK-LABEL: test_non_leaf:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    bl otherfn
 ; CHECK-NEXT:    ldr r0, [sp, #16]
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
index 63adc78fe849c3..2b7abfabf7035a 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
@@ -14,15 +14,12 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
 ; CHECK-NEXT:    .pad #12
 ; CHECK-NEXT:    sub sp, #12
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset lr, -16
-; CHECK-NEXT:    .cfi_offset r7, -20
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -24
+; CHECK-NEXT:    .cfi_offset lr, -16
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .cfi_offset r7, -24
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .cfi_def_cfa_offset 28
@@ -49,8 +46,7 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:  .LBB0_4: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    add sp, #12
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
index 38b5b7a16e01bd..03b769f256bc28 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
@@ -28,17 +28,14 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
 ; CHECK-NEXT:    .pad #12
 ; CHECK-NEXT:    sub sp, #12
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 28
-; CHECK-NEXT:    .cfi_offset lr, -16
-; CHECK-NEXT:    .cfi_offset r7, -20
-; CHECK-NEXT:    .cfi_offset r5, -24
-; CHECK-NEXT:    .cfi_offset r4, -28
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r4, r5, r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r5, r7, r12, lr}
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -32
+; CHECK-NEXT:    .cfi_offset lr, -16
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .cfi_offset r7, -24
+; CHECK-NEXT:    .cfi_offset r5, -28
+; CHECK-NEXT:    .cfi_offset r4, -32
 ; CHECK-NEXT:    .pad #8
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    .cfi_def_cfa_offset 40
@@ -63,8 +60,7 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB0_2: @ %for.cond.cleanup
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    add sp, #8
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
+; CHECK-NEXT:    pop.w {r4, r5, r7, r12, lr}
 ; CHECK-NEXT:    add sp, #12
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
@@ -111,7 +107,9 @@ attributes #1 = { nounwind "sign-return-address"="non-leaf"}
 !2 = !{i32 8, !"sign-return-address-all", i32 0}
 
 ; UNWIND-LABEL: FunctionAddress
-; UNWIND:       0x01      ; vsp = vsp + 8
-; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x0B ; pop {r4, r5, r7, lr}
-; UNWIND-NEXT:  0x02      ; vsp = vsp + 12
+; UNWIND:      0x01      ; vsp = vsp + 8
+; UNWIND-NEXT: 0x80 0x0B ; pop {r4, r5, r7}
+; UNWIND-NEXT: 0xB4      ; pop ra_auth_code
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
+; UNWIND-NEXT: 0x02      ; vsp = vsp + 12
+
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index ccab35b7331141..5eb5990be7c118 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -20,22 +20,20 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .cfi_offset r6, -12
-; CHECK-NEXT:    .cfi_offset r5, -16
-; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r9, -12
+; CHECK-NEXT:    .cfi_offset r8, -16
+; CHECK-NEXT:    .cfi_offset r7, -20
+; CHECK-NEXT:    .cfi_offset r6, -24
+; CHECK-NEXT:    .cfi_offset r5, -28
+; CHECK-NEXT:    .cfi_offset r4, -32
 ; CHECK-NEXT:    .setfp r7, sp, #12
 ; CHECK-NEXT:    add r7, sp, #12
-; CHECK-NEXT:    .cfi_def_cfa r7, 8
-; CHECK-NEXT:    .save {r8, r9, ra_auth_code}
-; CHECK-NEXT:    push.w {r8, r9, r12}
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -24
-; CHECK-NEXT:    .cfi_offset r9, -28
-; CHECK-NEXT:    .cfi_offset r8, -32
+; CHECK-NEXT:    .cfi_def_cfa r7, 20
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    movs r0, #7
 ; CHECK-NEXT:    add.w r0, r0, r5, lsl #2
@@ -95,10 +93,9 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
 ; CHECK-NEXT:    ldrne r1, [r2, #8]
 ; CHECK-NEXT:    addne r0, r1
 ; CHECK-NEXT:  .LBB0_9: @ %for.cond.cleanup
-; CHECK-NEXT:    sub.w r4, r7, #24
+; CHECK-NEXT:    sub.w r4, r7, #12
 ; CHECK-NEXT:    mov sp, r4
-; CHECK-NEXT:    pop.w {r8, r9, r12}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:



More information about the llvm-commits mailing list