[clang] [llvm] [AArch64] Fix argument passing in reserved registers for preserve_nonecc (PR #96259)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 20 17:54:16 PDT 2024


https://github.com/antangelo created https://github.com/llvm/llvm-project/pull/96259

These registers include:
- X19, used by LLVM as the base pointer
- X15 on Windows, where it is used for stack allocation. It can still be used on Linux/Darwin
- Adjust FrameLowering scratch register code to not assume X9 is available if the calling convention is preserve_nonecc. The code will then pick an unused register as scratch, and allow X9 to continue being used for argument passing.

>From 71de801f44be445f10a63d684b804d5a082ca7ce Mon Sep 17 00:00:00 2001
From: Antonio Abbatangelo <contact at antangelo.com>
Date: Thu, 20 Jun 2024 00:41:36 -0400
Subject: [PATCH] [AArch64] Fix argument passing in reserved registers for
 preserve_nonecc

These registers include:
- X19, used by LLVM as the base pointer
- X15 on Windows, where it is used for stack allocation. It can still be
  used on Linux/Darwin
- Adjust FrameLowering scratch register code to not assume X9 is
  available if the calling convention is preserve_nonecc. The code will
  then pick an unused register as scratch, and allow X9 to continue being
  used for argument passing.
---
 clang/include/clang/Basic/AttrDocs.td         |   7 +-
 .../AArch64/AArch64CallingConvention.td       |  22 +-
 .../Target/AArch64/AArch64FrameLowering.cpp   |   5 +-
 .../CodeGen/AArch64/preserve_nonecc_call.ll   | 302 ++++++++++++++----
 4 files changed, 267 insertions(+), 69 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 70d5dfa8aaf86..9a523c99902d8 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5672,9 +5672,10 @@ may be changed in the future.
   be used to pass function arguments. Floating-point registers (XMMs/YMMs) still
   follow the C calling convention.
 - On AArch64, only LR and FP are preserved by the callee.
-  Registers X19-X28, X0-X7, and X9-X15 are used to pass function arguments.
-  X8, X16-X18, SIMD and floating-point registers follow the AAPCS calling
-  convention.
+  Registers X20-X28, X0-X7, and X9-X14 are used to pass function arguments.
+  X8, X16-X19, SIMD and floating-point registers follow the AAPCS calling
+  convention. X15 is not available for argument passing on Windows, but is
+  used to pass arguments on other platforms.
   }];
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 941990c53c4a7..2f7e226fd09b2 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -500,23 +500,31 @@ def CC_AArch64_Preserve_None : CallingConv<[
     // - X8, used for sret
     // - X16/X17, used by the linker as IP0/IP1
     // - X18, the platform register
+    // - X19, the base pointer
     // - X29, the frame pointer
     // - X30, the link register
     // General registers are not preserved with the exception of
     // FP, LR, and X18
     // Non-volatile registers are used first, so functions may call
     // normal functions without saving and reloading arguments.
-    CCIfType<[i32], CCAssignToReg<[W19, W20, W21, W22, W23,
+    // X9 is assigned last as it is used in FrameLowering as the first
+    // choice for a scratch register.
+    CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23,
                                    W24, W25, W26, W27, W28,
                                    W0, W1, W2, W3, W4, W5,
-                                   W6, W7, W9, W10, W11,
-                                   W12, W13, W14, W15]>>,
-    CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23,
+                                   W6, W7, W10, W11,
+                                   W12, W13, W14, W9]>>,
+    CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23,
                                    X24, X25, X26, X27, X28,
                                    X0, X1, X2, X3, X4, X5,
-                                   X6, X7, X9, X10, X11,
-                                   X12, X13, X14, X15]>>,
-
+                                   X6, X7, X10, X11,
+                                   X12, X13, X14, X9]>>,
+
+    // Windows uses X15 for stack allocation
+    CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
+        CCIfType<[i32], CCAssignToReg<[W15]>>>,
+    CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
+        CCIfType<[i64], CCAssignToReg<[X15]>>>,
     CCDelegateTo<CC_AArch64_AAPCS>
 ]>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a99181373fc3c..8216fa7db822c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1036,7 +1036,10 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
   MachineFunction *MF = MBB->getParent();
 
   // If MBB is an entry block, use X9 as the scratch register
-  if (&MF->front() == MBB)
+  // preserve_none functions may be using X9 to pass arguments,
+  // so prefer to pick an available register below.
+  if (&MF->front() == MBB &&
+      MF->getFunction().getCallingConv() != CallingConv::PreserveNone)
     return AArch64::X9;
 
   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
index 396005dfdd27e..9b9717c19321e 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck --check-prefixes=CHECK %s
 ; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefixes=DARWIN %s
+; RUN: llc -mtriple=aarch64-pc-windows < %s | FileCheck --check-prefixes=WIN %s
 
 ; This test checks various function call behaviors between preserve_none and
 ; normal calling conventions.
@@ -43,7 +44,7 @@ define void @caller1(ptr %a) {
 ; CHECK-NEXT:    .cfi_offset b13, -144
 ; CHECK-NEXT:    .cfi_offset b14, -152
 ; CHECK-NEXT:    .cfi_offset b15, -160
-; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x20, x0
 ; CHECK-NEXT:    bl callee
 ; CHECK-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
@@ -90,7 +91,7 @@ define void @caller1(ptr %a) {
 ; DARWIN-NEXT:    .cfi_offset b13, -144
 ; DARWIN-NEXT:    .cfi_offset b14, -152
 ; DARWIN-NEXT:    .cfi_offset b15, -160
-; DARWIN-NEXT:    mov x19, x0
+; DARWIN-NEXT:    mov x20, x0
 ; DARWIN-NEXT:    bl _callee
 ; DARWIN-NEXT:    ldp x29, x30, [sp, #144] ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
@@ -103,6 +104,58 @@ define void @caller1(ptr %a) {
 ; DARWIN-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ldp d15, d14, [sp], #160 ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ret
+;
+; WIN-LABEL: caller1:
+; WIN:       .seh_proc caller1
+; WIN-NEXT:  // %bb.0:
+; WIN-NEXT:    stp x19, x20, [sp, #-160]! // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp_x x19, 160
+; WIN-NEXT:    stp x21, x22, [sp, #16] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x21, 16
+; WIN-NEXT:    stp x23, x24, [sp, #32] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x23, 32
+; WIN-NEXT:    stp x25, x26, [sp, #48] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x25, 48
+; WIN-NEXT:    stp x27, x28, [sp, #64] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x27, 64
+; WIN-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_reg x30, 80
+; WIN-NEXT:    stp d8, d9, [sp, #88] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d8, 88
+; WIN-NEXT:    stp d10, d11, [sp, #104] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d10, 104
+; WIN-NEXT:    stp d12, d13, [sp, #120] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d12, 120
+; WIN-NEXT:    stp d14, d15, [sp, #136] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d14, 136
+; WIN-NEXT:    .seh_endprologue
+; WIN-NEXT:    mov x20, x0
+; WIN-NEXT:    bl callee
+; WIN-NEXT:    .seh_startepilogue
+; WIN-NEXT:    ldp d14, d15, [sp, #136] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d14, 136
+; WIN-NEXT:    ldp d12, d13, [sp, #120] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d12, 120
+; WIN-NEXT:    ldp d10, d11, [sp, #104] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d10, 104
+; WIN-NEXT:    ldp d8, d9, [sp, #88] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d8, 88
+; WIN-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_reg x30, 80
+; WIN-NEXT:    ldp x27, x28, [sp, #64] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x27, 64
+; WIN-NEXT:    ldp x25, x26, [sp, #48] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x25, 48
+; WIN-NEXT:    ldp x23, x24, [sp, #32] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x23, 32
+; WIN-NEXT:    ldp x21, x22, [sp, #16] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x21, 16
+; WIN-NEXT:    ldp x19, x20, [sp], #160 // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp_x x19, 160
+; WIN-NEXT:    .seh_endepilogue
+; WIN-NEXT:    ret
+; WIN-NEXT:    .seh_endfunclet
+; WIN-NEXT:    .seh_endproc
   tail call preserve_nonecc void @callee(ptr %a)
   ret void
 }
@@ -118,21 +171,24 @@ define preserve_nonecc void @caller2(ptr %a) {
 ; DARWIN-LABEL: caller2:
 ; DARWIN:       ; %bb.0:
 ; DARWIN-NEXT:    b _callee
+;
+; WIN-LABEL: caller2:
+; WIN:       // %bb.0:
+; WIN-NEXT:    b callee
   tail call preserve_nonecc void @callee(ptr %a)
   ret void
 }
 
 ; Preserve_none function can use more registers to pass parameters.
-declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a25)
-define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a25) {
+declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24)
+define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24) {
 ; CHECK-LABEL: callee_with_many_param:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x8, x15
-; CHECK-NEXT:    mov x15, x19
-; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    mov x15, x20
 ; CHECK-NEXT:    mov x20, x21
 ; CHECK-NEXT:    mov x21, x22
 ; CHECK-NEXT:    mov x22, x23
@@ -149,13 +205,13 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
 ; CHECK-NEXT:    mov x4, x5
 ; CHECK-NEXT:    mov x5, x6
 ; CHECK-NEXT:    mov x6, x7
-; CHECK-NEXT:    mov x7, x9
-; CHECK-NEXT:    mov x9, x10
+; CHECK-NEXT:    mov x7, x10
 ; CHECK-NEXT:    mov x10, x11
 ; CHECK-NEXT:    mov x11, x12
 ; CHECK-NEXT:    mov x12, x13
 ; CHECK-NEXT:    mov x13, x14
-; CHECK-NEXT:    mov x14, x8
+; CHECK-NEXT:    mov x14, x9
+; CHECK-NEXT:    mov x9, x8
 ; CHECK-NEXT:    bl callee_with_many_param2
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -167,8 +223,7 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
 ; DARWIN-NEXT:    .cfi_offset w30, -8
 ; DARWIN-NEXT:    .cfi_offset w29, -16
 ; DARWIN-NEXT:    mov x8, x15
-; DARWIN-NEXT:    mov x15, x19
-; DARWIN-NEXT:    mov x19, x20
+; DARWIN-NEXT:    mov x15, x20
 ; DARWIN-NEXT:    mov x20, x21
 ; DARWIN-NEXT:    mov x21, x22
 ; DARWIN-NEXT:    mov x22, x23
@@ -185,17 +240,62 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
 ; DARWIN-NEXT:    mov x4, x5
 ; DARWIN-NEXT:    mov x5, x6
 ; DARWIN-NEXT:    mov x6, x7
-; DARWIN-NEXT:    mov x7, x9
-; DARWIN-NEXT:    mov x9, x10
+; DARWIN-NEXT:    mov x7, x10
 ; DARWIN-NEXT:    mov x10, x11
 ; DARWIN-NEXT:    mov x11, x12
 ; DARWIN-NEXT:    mov x12, x13
 ; DARWIN-NEXT:    mov x13, x14
-; DARWIN-NEXT:    mov x14, x8
+; DARWIN-NEXT:    mov x14, x9
+; DARWIN-NEXT:    mov x9, x8
 ; DARWIN-NEXT:    bl _callee_with_many_param2
 ; DARWIN-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ret
-  %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a25, i64 %a1)
+;
+; WIN-LABEL: callee_with_many_param:
+; WIN:       .seh_proc callee_with_many_param
+; WIN-NEXT:  // %bb.0:
+; WIN-NEXT:    sub sp, sp, #32
+; WIN-NEXT:    .seh_stackalloc 32
+; WIN-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_reg x30, 16
+; WIN-NEXT:    .seh_endprologue
+; WIN-NEXT:    ldr x8, [sp, #32]
+; WIN-NEXT:    mov x15, x20
+; WIN-NEXT:    mov x20, x21
+; WIN-NEXT:    mov x21, x22
+; WIN-NEXT:    mov x22, x23
+; WIN-NEXT:    mov x23, x24
+; WIN-NEXT:    mov x24, x25
+; WIN-NEXT:    mov x25, x26
+; WIN-NEXT:    mov x26, x27
+; WIN-NEXT:    mov x27, x28
+; WIN-NEXT:    mov x28, x0
+; WIN-NEXT:    mov x0, x1
+; WIN-NEXT:    mov x1, x2
+; WIN-NEXT:    mov x2, x3
+; WIN-NEXT:    mov x3, x4
+; WIN-NEXT:    mov x4, x5
+; WIN-NEXT:    mov x5, x6
+; WIN-NEXT:    mov x6, x7
+; WIN-NEXT:    mov x7, x10
+; WIN-NEXT:    mov x10, x11
+; WIN-NEXT:    mov x11, x12
+; WIN-NEXT:    mov x12, x13
+; WIN-NEXT:    mov x13, x14
+; WIN-NEXT:    mov x14, x9
+; WIN-NEXT:    mov x9, x8
+; WIN-NEXT:    str x15, [sp]
+; WIN-NEXT:    bl callee_with_many_param2
+; WIN-NEXT:    .seh_startepilogue
+; WIN-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_reg x30, 16
+; WIN-NEXT:    add sp, sp, #32
+; WIN-NEXT:    .seh_stackalloc 32
+; WIN-NEXT:    .seh_endepilogue
+; WIN-NEXT:    ret
+; WIN-NEXT:    .seh_endfunclet
+; WIN-NEXT:    .seh_endproc
+  %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a1)
   ret i64 %ret
 }
 
@@ -232,27 +332,30 @@ define i64 @caller3() {
 ; CHECK-NEXT:    .cfi_offset b13, -144
 ; CHECK-NEXT:    .cfi_offset b14, -152
 ; CHECK-NEXT:    .cfi_offset b15, -160
-; CHECK-NEXT:    mov w19, #1 // =0x1
-; CHECK-NEXT:    mov w20, #2 // =0x2
-; CHECK-NEXT:    mov w21, #3 // =0x3
-; CHECK-NEXT:    mov w22, #4 // =0x4
-; CHECK-NEXT:    mov w23, #5 // =0x5
-; CHECK-NEXT:    mov w24, #6 // =0x6
-; CHECK-NEXT:    mov w25, #7 // =0x7
-; CHECK-NEXT:    mov w26, #8 // =0x8
-; CHECK-NEXT:    mov w27, #9 // =0x9
-; CHECK-NEXT:    mov w28, #10 // =0xa
-; CHECK-NEXT:    mov w0, #11 // =0xb
-; CHECK-NEXT:    mov w1, #12 // =0xc
-; CHECK-NEXT:    mov w2, #13 // =0xd
-; CHECK-NEXT:    mov w3, #14 // =0xe
-; CHECK-NEXT:    mov w4, #15 // =0xf
-; CHECK-NEXT:    mov w5, #16 // =0x10
-; CHECK-NEXT:    mov w6, #17 // =0x11
-; CHECK-NEXT:    mov w7, #18 // =0x12
-; CHECK-NEXT:    mov w9, #19 // =0x13
-; CHECK-NEXT:    mov w10, #20 // =0x14
-; CHECK-NEXT:    mov w11, #21 // =0x15
+; CHECK-NEXT:    mov w20, #1 // =0x1
+; CHECK-NEXT:    mov w21, #2 // =0x2
+; CHECK-NEXT:    mov w22, #3 // =0x3
+; CHECK-NEXT:    mov w23, #4 // =0x4
+; CHECK-NEXT:    mov w24, #5 // =0x5
+; CHECK-NEXT:    mov w25, #6 // =0x6
+; CHECK-NEXT:    mov w26, #7 // =0x7
+; CHECK-NEXT:    mov w27, #8 // =0x8
+; CHECK-NEXT:    mov w28, #9 // =0x9
+; CHECK-NEXT:    mov w0, #10 // =0xa
+; CHECK-NEXT:    mov w1, #11 // =0xb
+; CHECK-NEXT:    mov w2, #12 // =0xc
+; CHECK-NEXT:    mov w3, #13 // =0xd
+; CHECK-NEXT:    mov w4, #14 // =0xe
+; CHECK-NEXT:    mov w5, #15 // =0xf
+; CHECK-NEXT:    mov w6, #16 // =0x10
+; CHECK-NEXT:    mov w7, #17 // =0x11
+; CHECK-NEXT:    mov w10, #18 // =0x12
+; CHECK-NEXT:    mov w11, #19 // =0x13
+; CHECK-NEXT:    mov w12, #20 // =0x14
+; CHECK-NEXT:    mov w13, #21 // =0x15
+; CHECK-NEXT:    mov w14, #22 // =0x16
+; CHECK-NEXT:    mov w9, #23 // =0x17
+; CHECK-NEXT:    mov w15, #24 // =0x18
 ; CHECK-NEXT:    bl callee_with_many_param
 ; CHECK-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
@@ -299,27 +402,30 @@ define i64 @caller3() {
 ; DARWIN-NEXT:    .cfi_offset b13, -144
 ; DARWIN-NEXT:    .cfi_offset b14, -152
 ; DARWIN-NEXT:    .cfi_offset b15, -160
-; DARWIN-NEXT:    mov w19, #1 ; =0x1
-; DARWIN-NEXT:    mov w20, #2 ; =0x2
-; DARWIN-NEXT:    mov w21, #3 ; =0x3
-; DARWIN-NEXT:    mov w22, #4 ; =0x4
-; DARWIN-NEXT:    mov w23, #5 ; =0x5
-; DARWIN-NEXT:    mov w24, #6 ; =0x6
-; DARWIN-NEXT:    mov w25, #7 ; =0x7
-; DARWIN-NEXT:    mov w26, #8 ; =0x8
-; DARWIN-NEXT:    mov w27, #9 ; =0x9
-; DARWIN-NEXT:    mov w28, #10 ; =0xa
-; DARWIN-NEXT:    mov w0, #11 ; =0xb
-; DARWIN-NEXT:    mov w1, #12 ; =0xc
-; DARWIN-NEXT:    mov w2, #13 ; =0xd
-; DARWIN-NEXT:    mov w3, #14 ; =0xe
-; DARWIN-NEXT:    mov w4, #15 ; =0xf
-; DARWIN-NEXT:    mov w5, #16 ; =0x10
-; DARWIN-NEXT:    mov w6, #17 ; =0x11
-; DARWIN-NEXT:    mov w7, #18 ; =0x12
-; DARWIN-NEXT:    mov w9, #19 ; =0x13
-; DARWIN-NEXT:    mov w10, #20 ; =0x14
-; DARWIN-NEXT:    mov w11, #21 ; =0x15
+; DARWIN-NEXT:    mov w20, #1 ; =0x1
+; DARWIN-NEXT:    mov w21, #2 ; =0x2
+; DARWIN-NEXT:    mov w22, #3 ; =0x3
+; DARWIN-NEXT:    mov w23, #4 ; =0x4
+; DARWIN-NEXT:    mov w24, #5 ; =0x5
+; DARWIN-NEXT:    mov w25, #6 ; =0x6
+; DARWIN-NEXT:    mov w26, #7 ; =0x7
+; DARWIN-NEXT:    mov w27, #8 ; =0x8
+; DARWIN-NEXT:    mov w28, #9 ; =0x9
+; DARWIN-NEXT:    mov w0, #10 ; =0xa
+; DARWIN-NEXT:    mov w1, #11 ; =0xb
+; DARWIN-NEXT:    mov w2, #12 ; =0xc
+; DARWIN-NEXT:    mov w3, #13 ; =0xd
+; DARWIN-NEXT:    mov w4, #14 ; =0xe
+; DARWIN-NEXT:    mov w5, #15 ; =0xf
+; DARWIN-NEXT:    mov w6, #16 ; =0x10
+; DARWIN-NEXT:    mov w7, #17 ; =0x11
+; DARWIN-NEXT:    mov w10, #18 ; =0x12
+; DARWIN-NEXT:    mov w11, #19 ; =0x13
+; DARWIN-NEXT:    mov w12, #20 ; =0x14
+; DARWIN-NEXT:    mov w13, #21 ; =0x15
+; DARWIN-NEXT:    mov w14, #22 ; =0x16
+; DARWIN-NEXT:    mov w9, #23 ; =0x17
+; DARWIN-NEXT:    mov w15, #24 ; =0x18
 ; DARWIN-NEXT:    bl _callee_with_many_param
 ; DARWIN-NEXT:    ldp x29, x30, [sp, #144] ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
@@ -332,6 +438,86 @@ define i64 @caller3() {
 ; DARWIN-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ldp d15, d14, [sp], #160 ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ret
-  %ret = call preserve_nonecc i64 @callee_with_many_param(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21)
+;
+; WIN-LABEL: caller3:
+; WIN:       .seh_proc caller3
+; WIN-NEXT:  // %bb.0:
+; WIN-NEXT:    sub sp, sp, #176
+; WIN-NEXT:    .seh_stackalloc 176
+; WIN-NEXT:    stp x19, x20, [sp, #16] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x19, 16
+; WIN-NEXT:    stp x21, x22, [sp, #32] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x21, 32
+; WIN-NEXT:    stp x23, x24, [sp, #48] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x23, 48
+; WIN-NEXT:    stp x25, x26, [sp, #64] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x25, 64
+; WIN-NEXT:    stp x27, x28, [sp, #80] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_regp x27, 80
+; WIN-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_reg x30, 96
+; WIN-NEXT:    stp d8, d9, [sp, #104] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d8, 104
+; WIN-NEXT:    stp d10, d11, [sp, #120] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d10, 120
+; WIN-NEXT:    stp d12, d13, [sp, #136] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d12, 136
+; WIN-NEXT:    stp d14, d15, [sp, #152] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d14, 152
+; WIN-NEXT:    .seh_endprologue
+; WIN-NEXT:    mov w8, #24 // =0x18
+; WIN-NEXT:    mov w20, #1 // =0x1
+; WIN-NEXT:    mov w21, #2 // =0x2
+; WIN-NEXT:    mov w22, #3 // =0x3
+; WIN-NEXT:    mov w23, #4 // =0x4
+; WIN-NEXT:    mov w24, #5 // =0x5
+; WIN-NEXT:    mov w25, #6 // =0x6
+; WIN-NEXT:    mov w26, #7 // =0x7
+; WIN-NEXT:    mov w27, #8 // =0x8
+; WIN-NEXT:    mov w28, #9 // =0x9
+; WIN-NEXT:    mov w0, #10 // =0xa
+; WIN-NEXT:    mov w1, #11 // =0xb
+; WIN-NEXT:    mov w2, #12 // =0xc
+; WIN-NEXT:    mov w3, #13 // =0xd
+; WIN-NEXT:    mov w4, #14 // =0xe
+; WIN-NEXT:    mov w5, #15 // =0xf
+; WIN-NEXT:    mov w6, #16 // =0x10
+; WIN-NEXT:    mov w7, #17 // =0x11
+; WIN-NEXT:    mov w10, #18 // =0x12
+; WIN-NEXT:    mov w11, #19 // =0x13
+; WIN-NEXT:    mov w12, #20 // =0x14
+; WIN-NEXT:    mov w13, #21 // =0x15
+; WIN-NEXT:    mov w14, #22 // =0x16
+; WIN-NEXT:    mov w9, #23 // =0x17
+; WIN-NEXT:    str x8, [sp]
+; WIN-NEXT:    bl callee_with_many_param
+; WIN-NEXT:    .seh_startepilogue
+; WIN-NEXT:    ldp d14, d15, [sp, #152] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d14, 152
+; WIN-NEXT:    ldp d12, d13, [sp, #136] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d12, 136
+; WIN-NEXT:    ldp d10, d11, [sp, #120] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d10, 120
+; WIN-NEXT:    ldp d8, d9, [sp, #104] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d8, 104
+; WIN-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_reg x30, 96
+; WIN-NEXT:    ldp x27, x28, [sp, #80] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x27, 80
+; WIN-NEXT:    ldp x25, x26, [sp, #64] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x25, 64
+; WIN-NEXT:    ldp x23, x24, [sp, #48] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x23, 48
+; WIN-NEXT:    ldp x21, x22, [sp, #32] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x21, 32
+; WIN-NEXT:    ldp x19, x20, [sp, #16] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_regp x19, 16
+; WIN-NEXT:    add sp, sp, #176
+; WIN-NEXT:    .seh_stackalloc 176
+; WIN-NEXT:    .seh_endepilogue
+; WIN-NEXT:    ret
+; WIN-NEXT:    .seh_endfunclet
+; WIN-NEXT:    .seh_endproc
+  %ret = call preserve_nonecc i64 @callee_with_many_param(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24)
   ret i64 %ret
 }



More information about the cfe-commits mailing list