[llvm] [X86] Align f128 to 16 bytes when passing on x86-32 (PR #138092)

Trevor Gross via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 11 03:46:23 PDT 2025


https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/138092

>From f3883930e548943ca4e98e542caeeecac1d23118 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 11 Jul 2025 06:26:56 -0400
Subject: [PATCH 1/2] [X86] Update the fp128/i128 test to show stack alignment
 (nfc)

Add a test showing that adding an extra argument before a `fp128` only
changes the stack offset by four bytes. Also set `no_x86_scrub_sp`
because that is relevant.
---
 llvm/test/CodeGen/X86/i128-fp128-abi.ll | 412 ++++++++++++++++--------
 1 file changed, 281 insertions(+), 131 deletions(-)

diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
index be8f7923b8f98..ae4a7f12bff50 100644
--- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
 
 ; Combined ABI tests for fp128 and i128
 
@@ -15,6 +15,153 @@
 ; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs     | FileCheck %s --check-prefix=CHECK-MSVC32
 ; RUN: sed 's/PrimTy/i128/g'  %s | sed 's/Prim0/0/g'    | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs     | FileCheck %s --check-prefix=CHECK-MSVC32
 
+define void @store(PrimTy %x, ptr %p) nounwind {
+; CHECK-X64-F128-LABEL: store:
+; CHECK-X64-F128:       # %bb.0:
+; CHECK-X64-F128-NEXT:    movaps %xmm0, (%rdi)
+; CHECK-X64-F128-NEXT:    retq
+;
+; CHECK-X64-I128-LABEL: store:
+; CHECK-X64-I128:       # %bb.0:
+; CHECK-X64-I128-NEXT:    movq %rsi, 8(%rdx)
+; CHECK-X64-I128-NEXT:    movq %rdi, (%rdx)
+; CHECK-X64-I128-NEXT:    retq
+;
+; CHECK-MSVC64-F128-LABEL: store:
+; CHECK-MSVC64-F128:       # %bb.0:
+; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%rdx)
+; CHECK-MSVC64-F128-NEXT:    retq
+;
+; CHECK-MSVC64-I128-LABEL: store:
+; CHECK-MSVC64-I128:       # %bb.0:
+; CHECK-MSVC64-I128-NEXT:    movq %rdx, 8(%r8)
+; CHECK-MSVC64-I128-NEXT:    movq %rcx, (%r8)
+; CHECK-MSVC64-I128-NEXT:    retq
+;
+; CHECK-MINGW-F128-LABEL: store:
+; CHECK-MINGW-F128:       # %bb.0:
+; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%rdx)
+; CHECK-MINGW-F128-NEXT:    retq
+;
+; CHECK-MINGW-I128-LABEL: store:
+; CHECK-MINGW-I128:       # %bb.0:
+; CHECK-MINGW-I128-NEXT:    movq %rdx, 8(%r8)
+; CHECK-MINGW-I128-NEXT:    movq %rcx, (%r8)
+; CHECK-MINGW-I128-NEXT:    retq
+;
+; CHECK-X86-LABEL: store:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    movl 12(%esp), %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %ecx
+; CHECK-X86-NEXT:    movl 20(%esp), %edx
+; CHECK-X86-NEXT:    movl 24(%esp), %esi
+; CHECK-X86-NEXT:    movl 28(%esp), %edi
+; CHECK-X86-NEXT:    movl %esi, 12(%edi)
+; CHECK-X86-NEXT:    movl %edx, 8(%edi)
+; CHECK-X86-NEXT:    movl %ecx, 4(%edi)
+; CHECK-X86-NEXT:    movl %eax, (%edi)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-MSVC32-LABEL: store:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 16(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 20(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl 24(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl 28(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl %esi, 12(%edi)
+; CHECK-MSVC32-NEXT:    movl %edx, 8(%edi)
+; CHECK-MSVC32-NEXT:    movl %ecx, 4(%edi)
+; CHECK-MSVC32-NEXT:    movl %eax, (%edi)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+  store PrimTy %x, ptr %p
+  ret void
+}
+
+; Illustrate stack alignment
+define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind {
+; CHECK-X64-F128-LABEL: store_perturbed:
+; CHECK-X64-F128:       # %bb.0:
+; CHECK-X64-F128-NEXT:    movaps %xmm0, (%rsi)
+; CHECK-X64-F128-NEXT:    retq
+;
+; CHECK-X64-I128-LABEL: store_perturbed:
+; CHECK-X64-I128:       # %bb.0:
+; CHECK-X64-I128-NEXT:    movq %rdx, 8(%rcx)
+; CHECK-X64-I128-NEXT:    movq %rsi, (%rcx)
+; CHECK-X64-I128-NEXT:    retq
+;
+; CHECK-MSVC64-F128-LABEL: store_perturbed:
+; CHECK-MSVC64-F128:       # %bb.0:
+; CHECK-MSVC64-F128-NEXT:    movaps (%rdx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%r8)
+; CHECK-MSVC64-F128-NEXT:    retq
+;
+; CHECK-MSVC64-I128-LABEL: store_perturbed:
+; CHECK-MSVC64-I128:       # %bb.0:
+; CHECK-MSVC64-I128-NEXT:    movq %r8, 8(%r9)
+; CHECK-MSVC64-I128-NEXT:    movq %rdx, (%r9)
+; CHECK-MSVC64-I128-NEXT:    retq
+;
+; CHECK-MINGW-F128-LABEL: store_perturbed:
+; CHECK-MINGW-F128:       # %bb.0:
+; CHECK-MINGW-F128-NEXT:    movaps (%rdx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%r8)
+; CHECK-MINGW-F128-NEXT:    retq
+;
+; CHECK-MINGW-I128-LABEL: store_perturbed:
+; CHECK-MINGW-I128:       # %bb.0:
+; CHECK-MINGW-I128-NEXT:    movq %r8, 8(%r9)
+; CHECK-MINGW-I128-NEXT:    movq %rdx, (%r9)
+; CHECK-MINGW-I128-NEXT:    retq
+;
+; CHECK-X86-LABEL: store_perturbed:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    movl 16(%esp), %eax
+; CHECK-X86-NEXT:    movl 20(%esp), %ecx
+; CHECK-X86-NEXT:    movl 24(%esp), %edx
+; CHECK-X86-NEXT:    movl 28(%esp), %esi
+; CHECK-X86-NEXT:    movl 32(%esp), %edi
+; CHECK-X86-NEXT:    movl %esi, 12(%edi)
+; CHECK-X86-NEXT:    movl %edx, 8(%edi)
+; CHECK-X86-NEXT:    movl %ecx, 4(%edi)
+; CHECK-X86-NEXT:    movl %eax, (%edi)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-MSVC32-LABEL: store_perturbed:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl 16(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 20(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 24(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl 28(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl 32(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl %esi, 12(%edi)
+; CHECK-MSVC32-NEXT:    movl %edx, 8(%edi)
+; CHECK-MSVC32-NEXT:    movl %ecx, 4(%edi)
+; CHECK-MSVC32-NEXT:    movl %eax, (%edi)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+  store PrimTy %x, ptr %p
+  ret void
+}
+
 define PrimTy @return(ptr %p) nounwind {
 ; CHECK-X64-F128-LABEL: return:
 ; CHECK-X64-F128:       # %bb.0:
@@ -53,8 +200,8 @@ define PrimTy @return(ptr %p) nounwind {
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT:    movl 12(%esp), %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %ecx
 ; CHECK-X86-NEXT:    movl (%ecx), %edx
 ; CHECK-X86-NEXT:    movl 4(%ecx), %esi
 ; CHECK-X86-NEXT:    movl 8(%ecx), %edi
@@ -71,8 +218,8 @@ define PrimTy @return(ptr %p) nounwind {
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 16(%esp), %ecx
 ; CHECK-MSVC32-NEXT:    movl (%ecx), %edx
 ; CHECK-MSVC32-NEXT:    movl 4(%ecx), %esi
 ; CHECK-MSVC32-NEXT:    movl 8(%ecx), %edi
@@ -123,11 +270,11 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl 12(%esp), %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %ecx
+; CHECK-X86-NEXT:    movl 20(%esp), %edx
+; CHECK-X86-NEXT:    movl 24(%esp), %esi
+; CHECK-X86-NEXT:    movl 28(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
@@ -140,11 +287,11 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 16(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 20(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl 24(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl 28(%esp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
@@ -155,6 +302,8 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
   ret PrimTy %x
 }
 
+; Leading args such that i128 is the last possible position where it still
+; gets passed in registers.
 define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounwind {
 ; CHECK-X64-F128-LABEL: leading_args:
 ; CHECK-X64-F128:       # %bb.0:
@@ -168,37 +317,37 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
 ;
 ; CHECK-MSVC64-F128-LABEL: leading_args:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq 40(%rsp), %rax
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rax), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: leading_args:
 ; CHECK-MSVC64-I128:       # %bb.0:
-; CHECK-MSVC64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT:    movq 40(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT:    movq 48(%rsp), %rdx
 ; CHECK-MSVC64-I128-NEXT:    retq
 ;
 ; CHECK-MINGW-F128-LABEL: leading_args:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq 40(%rsp), %rax
 ; CHECK-MINGW-F128-NEXT:    movaps (%rax), %xmm0
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: leading_args:
 ; CHECK-MINGW-I128:       # %bb.0:
-; CHECK-MINGW-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT:    movq 40(%rsp), %rax
+; CHECK-MINGW-I128-NEXT:    movq 48(%rsp), %rdx
 ; CHECK-MINGW-I128-NEXT:    retq
 ;
 ; CHECK-X86-LABEL: leading_args:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl 12(%esp), %eax
+; CHECK-X86-NEXT:    movl 48(%esp), %ecx
+; CHECK-X86-NEXT:    movl 52(%esp), %edx
+; CHECK-X86-NEXT:    movl 56(%esp), %esi
+; CHECK-X86-NEXT:    movl 60(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
@@ -211,11 +360,11 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 48(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 52(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl 56(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl 60(%esp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
@@ -226,6 +375,7 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
   ret PrimTy %x
 }
 
+; The i128 of interest must be in memory.
 define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy %_5, PrimTy %x) nounwind {
 ; CHECK-X64-F128-LABEL: many_leading_args:
 ; CHECK-X64-F128:       # %bb.0:
@@ -234,43 +384,43 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
 ;
 ; CHECK-X64-I128-LABEL: many_leading_args:
 ; CHECK-X64-I128:       # %bb.0:
-; CHECK-X64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-X64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-X64-I128-NEXT:    movq 24(%rsp), %rax
+; CHECK-X64-I128-NEXT:    movq 32(%rsp), %rdx
 ; CHECK-X64-I128-NEXT:    retq
 ;
 ; CHECK-MSVC64-F128-LABEL: many_leading_args:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq 56(%rsp), %rax
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rax), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: many_leading_args:
 ; CHECK-MSVC64-I128:       # %bb.0:
-; CHECK-MSVC64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT:    movq 64(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT:    movq 72(%rsp), %rdx
 ; CHECK-MSVC64-I128-NEXT:    retq
 ;
 ; CHECK-MINGW-F128-LABEL: many_leading_args:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq 56(%rsp), %rax
 ; CHECK-MINGW-F128-NEXT:    movaps (%rax), %xmm0
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: many_leading_args:
 ; CHECK-MINGW-I128:       # %bb.0:
-; CHECK-MINGW-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT:    movq 64(%rsp), %rax
+; CHECK-MINGW-I128-NEXT:    movq 72(%rsp), %rdx
 ; CHECK-MINGW-I128-NEXT:    retq
 ;
 ; CHECK-X86-LABEL: many_leading_args:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl 12(%esp), %eax
+; CHECK-X86-NEXT:    movl 72(%esp), %ecx
+; CHECK-X86-NEXT:    movl 76(%esp), %edx
+; CHECK-X86-NEXT:    movl 80(%esp), %esi
+; CHECK-X86-NEXT:    movl 84(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
@@ -283,11 +433,11 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 72(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 76(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl 80(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl 84(%esp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
@@ -305,43 +455,43 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
 ;
 ; CHECK-X64-I128-LABEL: trailing_arg:
 ; CHECK-X64-I128:       # %bb.0:
-; CHECK-X64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-X64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-X64-I128-NEXT:    movq 8(%rsp), %rax
+; CHECK-X64-I128-NEXT:    movq 16(%rsp), %rdx
 ; CHECK-X64-I128-NEXT:    retq
 ;
 ; CHECK-MSVC64-F128-LABEL: trailing_arg:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq 48(%rsp), %rax
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rax), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: trailing_arg:
 ; CHECK-MSVC64-I128:       # %bb.0:
-; CHECK-MSVC64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT:    movq 48(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT:    movq 56(%rsp), %rdx
 ; CHECK-MSVC64-I128-NEXT:    retq
 ;
 ; CHECK-MINGW-F128-LABEL: trailing_arg:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq 48(%rsp), %rax
 ; CHECK-MINGW-F128-NEXT:    movaps (%rax), %xmm0
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: trailing_arg:
 ; CHECK-MINGW-I128:       # %bb.0:
-; CHECK-MINGW-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT:    movq 48(%rsp), %rax
+; CHECK-MINGW-I128-NEXT:    movq 56(%rsp), %rdx
 ; CHECK-MINGW-I128-NEXT:    retq
 ;
 ; CHECK-X86-LABEL: trailing_arg:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl 12(%esp), %eax
+; CHECK-X86-NEXT:    movl 56(%esp), %ecx
+; CHECK-X86-NEXT:    movl 60(%esp), %edx
+; CHECK-X86-NEXT:    movl 64(%esp), %esi
+; CHECK-X86-NEXT:    movl 68(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
@@ -354,11 +504,11 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl 56(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl 60(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl 64(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl 68(%esp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
@@ -388,8 +538,8 @@ define void @call_first_arg(PrimTy %x) nounwind {
 ; CHECK-MSVC64-F128:       # %bb.0:
 ; CHECK-MSVC64-F128-NEXT:    subq $56, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 32(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 32(%rsp), %rcx
 ; CHECK-MSVC64-F128-NEXT:    callq first_arg
 ; CHECK-MSVC64-F128-NEXT:    addq $56, %rsp
 ; CHECK-MSVC64-F128-NEXT:    retq
@@ -405,8 +555,8 @@ define void @call_first_arg(PrimTy %x) nounwind {
 ; CHECK-MINGW-F128:       # %bb.0:
 ; CHECK-MINGW-F128-NEXT:    subq $56, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 32(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 32(%rsp), %rcx
 ; CHECK-MINGW-F128-NEXT:    callq first_arg
 ; CHECK-MINGW-F128-NEXT:    addq $56, %rsp
 ; CHECK-MINGW-F128-NEXT:    retq
@@ -421,11 +571,11 @@ define void @call_first_arg(PrimTy %x) nounwind {
 ; CHECK-X86-LABEL: call_first_arg:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    leal 12(%esp), %eax
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
 ; CHECK-X86-NEXT:    pushl %eax
 ; CHECK-X86-NEXT:    calll first_arg at PLT
 ; CHECK-X86-NEXT:    addl $56, %esp
@@ -481,9 +631,9 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ; CHECK-MSVC64-F128:       # %bb.0:
 ; CHECK-MSVC64-F128-NEXT:    subq $72, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 32(%rsp)
 ; CHECK-MSVC64-F128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MSVC64-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-F128-NEXT:    xorl %r8d, %r8d
@@ -495,8 +645,8 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ; CHECK-MSVC64-I128-LABEL: call_leading_args:
 ; CHECK-MSVC64-I128:       # %bb.0:
 ; CHECK-MSVC64-I128-NEXT:    subq $56, %rsp
-; CHECK-MSVC64-I128-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq %rdx, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq %rcx, 32(%rsp)
 ; CHECK-MSVC64-I128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MSVC64-I128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-I128-NEXT:    xorl %r8d, %r8d
@@ -509,9 +659,9 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ; CHECK-MINGW-F128:       # %bb.0:
 ; CHECK-MINGW-F128-NEXT:    subq $72, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq %rax, 32(%rsp)
 ; CHECK-MINGW-F128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MINGW-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-F128-NEXT:    xorl %r8d, %r8d
@@ -523,8 +673,8 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ; CHECK-MINGW-I128-LABEL: call_leading_args:
 ; CHECK-MINGW-I128:       # %bb.0:
 ; CHECK-MINGW-I128-NEXT:    subq $56, %rsp
-; CHECK-MINGW-I128-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq %rdx, 40(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq %rcx, 32(%rsp)
 ; CHECK-MINGW-I128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MINGW-I128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-I128-NEXT:    xorl %r8d, %r8d
@@ -536,11 +686,11 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ; CHECK-X86-LABEL: call_leading_args:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    leal 12(%esp), %eax
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
 ; CHECK-X86-NEXT:    pushl $0
 ; CHECK-X86-NEXT:    pushl $0
 ; CHECK-X86-NEXT:    pushl $0
@@ -620,12 +770,12 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ; CHECK-MSVC64-F128-NEXT:    subq $88, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    xorps %xmm1, %xmm1
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm1, 64(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 64(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 32(%rsp)
 ; CHECK-MSVC64-F128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MSVC64-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-F128-NEXT:    xorl %r8d, %r8d
@@ -637,10 +787,10 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ; CHECK-MSVC64-I128-LABEL: call_many_leading_args:
 ; CHECK-MSVC64-I128:       # %bb.0:
 ; CHECK-MSVC64-I128-NEXT:    subq $72, %rsp
-; CHECK-MSVC64-I128-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT:    movq $0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq %rdx, 56(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq %rcx, 48(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq $0, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq $0, 32(%rsp)
 ; CHECK-MSVC64-I128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MSVC64-I128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-I128-NEXT:    xorl %r8d, %r8d
@@ -654,12 +804,12 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ; CHECK-MINGW-F128-NEXT:    subq $88, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MINGW-F128-NEXT:    xorps %xmm1, %xmm1
-; CHECK-MINGW-F128-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT:    movaps %xmm1, 64(%rsp)
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 64(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq %rax, 32(%rsp)
 ; CHECK-MINGW-F128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MINGW-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-F128-NEXT:    xorl %r8d, %r8d
@@ -671,10 +821,10 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ; CHECK-MINGW-I128-LABEL: call_many_leading_args:
 ; CHECK-MINGW-I128:       # %bb.0:
 ; CHECK-MINGW-I128-NEXT:    subq $72, %rsp
-; CHECK-MINGW-I128-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT:    movq $0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq %rdx, 56(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq %rcx, 48(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq $0, 40(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq $0, 32(%rsp)
 ; CHECK-MINGW-I128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MINGW-I128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-I128-NEXT:    xorl %r8d, %r8d
@@ -686,11 +836,11 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ; CHECK-X86-LABEL: call_many_leading_args:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    leal 12(%esp), %eax
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
 ; CHECK-X86-NEXT:    pushl $0
 ; CHECK-X86-NEXT:    pushl $0
 ; CHECK-X86-NEXT:    pushl $0
@@ -770,9 +920,9 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ; CHECK-MSVC64-F128:       # %bb.0:
 ; CHECK-MSVC64-F128-NEXT:    subq $72, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 32(%rsp)
 ; CHECK-MSVC64-F128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MSVC64-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-F128-NEXT:    xorl %r8d, %r8d
@@ -784,8 +934,8 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ; CHECK-MSVC64-I128-LABEL: call_trailing_arg:
 ; CHECK-MSVC64-I128:       # %bb.0:
 ; CHECK-MSVC64-I128-NEXT:    subq $56, %rsp
-; CHECK-MSVC64-I128-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq %rdx, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT:    movq %rcx, 32(%rsp)
 ; CHECK-MSVC64-I128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MSVC64-I128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-I128-NEXT:    xorl %r8d, %r8d
@@ -798,9 +948,9 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ; CHECK-MINGW-F128:       # %bb.0:
 ; CHECK-MINGW-F128-NEXT:    subq $72, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movq %rax, 32(%rsp)
 ; CHECK-MINGW-F128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MINGW-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-F128-NEXT:    xorl %r8d, %r8d
@@ -812,8 +962,8 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ; CHECK-MINGW-I128-LABEL: call_trailing_arg:
 ; CHECK-MINGW-I128:       # %bb.0:
 ; CHECK-MINGW-I128-NEXT:    subq $56, %rsp
-; CHECK-MINGW-I128-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq %rdx, 40(%rsp)
+; CHECK-MINGW-I128-NEXT:    movq %rcx, 32(%rsp)
 ; CHECK-MINGW-I128-NEXT:    xorl %ecx, %ecx
 ; CHECK-MINGW-I128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-I128-NEXT:    xorl %r8d, %r8d
@@ -825,11 +975,11 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ; CHECK-X86-LABEL: call_trailing_arg:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    leal 12(%esp), %eax
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
+; CHECK-X86-NEXT:    pushl 56(%esp)
 ; CHECK-X86-NEXT:    pushl $0
 ; CHECK-X86-NEXT:    pushl $0
 ; CHECK-X86-NEXT:    pushl $0

>From 3b2a3857bfe5c4d15ad08d4f3393034c33bd51f1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 11 Jul 2025 05:45:02 -0400
Subject: [PATCH 2/2] [X86] Align f128 and i128 to 16 bytes when passing on
 x86-32

The i386 psABI specifies that `__float128` has 16 byte alignment and
must be passed on the stack; however, LLVM currently stores it in a
stack slot that has an offset of 4. Add a custom lowering to correct
this alignment to 16-byte.

i386 does not specify an `__int128`, but it seems reasonable to keep the
same behavior as `__float128` so this is changed as well.

Fixes: https://github.com/llvm/llvm-project/issues/77401
---
 llvm/docs/ReleaseNotes.md                   |   2 +
 llvm/lib/Target/X86/X86CallingConv.cpp      |  32 ++
 llvm/lib/Target/X86/X86CallingConv.td       |   5 +
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp |  15 +-
 llvm/test/CodeGen/X86/i128-fp128-abi.ll     | 454 ++++++++++++--------
 5 files changed, 321 insertions(+), 187 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index daf822388a2ff..e91460d3a551c 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -228,6 +228,8 @@ Changes to the X86 Backend
 --------------------------
 
 * `fp128` will now use `*f128` libcalls on 32-bit GNU targets as well.
+* On x86-32, `fp128` and `i128` are now passed with the expected 16-byte stack
+  alignment.
 
 Changes to the OCaml bindings
 -----------------------------
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 0b4c63f7a81f7..f456132ac4992 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -374,5 +374,37 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return true;
 }
 
+/// Special handling for i128 and fp128: on x86-32, i128 and fp128 get legalized
+/// as four i32s, but fp128 must be passed on the stack with 16-byte alignment.
+/// Technically only fp128 has a specified ABI, but it makes sense to handle
+/// i128 the same until we hear differently.
+static bool CC_X86_32_I128_FP128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                           CCValAssign::LocInfo &LocInfo,
+                           ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert(ValVT == MVT::i32 && "Should have i32 parts");
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+  PendingMembers.push_back(
+      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+  if (!ArgFlags.isInConsecutiveRegsLast())
+    return true;
+
+  unsigned NumRegs = PendingMembers.size();
+  assert(NumRegs == 4 && "Should have two parts");
+
+  int64_t Offset = State.AllocateStack(16, Align(16));
+  PendingMembers[0].convertToMem(Offset);
+  PendingMembers[1].convertToMem(Offset + 4);
+  PendingMembers[2].convertToMem(Offset + 8);
+  PendingMembers[3].convertToMem(Offset + 12);
+
+  State.addLoc(PendingMembers[0]);
+  State.addLoc(PendingMembers[1]);
+  State.addLoc(PendingMembers[2]);
+  State.addLoc(PendingMembers[3]);
+  PendingMembers.clear();
+  return true;
+}
+
 // Provides entry points of CC_X86 and RetCC_X86.
 #include "X86GenCallingConv.inc"
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 823e0caa02262..f020e0b55141c 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -859,6 +859,11 @@ def CC_X86_32_C : CallingConv<[
   // The 'nest' parameter, if any, is passed in ECX.
   CCIfNest<CCAssignToReg<[ECX]>>,
 
+  // i128 and fp128 need to be passed on the stack with a higher alignment than
+  // their legal types. Handle this with a custom function.
+  CCIfType<[i32],
+           CCIfConsecutiveRegs<CCCustom<"CC_X86_32_I128_FP128">>>,
+
   // On swifttailcc pass swiftself in ECX.
   CCIfCC<"CallingConv::SwiftTail",
          CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[ECX]>>>>,
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 9ad355311527b..b4639ac2577e8 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -237,9 +237,18 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
 bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
     Type *Ty, CallingConv::ID CallConv, bool isVarArg,
     const DataLayout &DL) const {
-  // i128 split into i64 needs to be allocated to two consecutive registers,
-  // or spilled to the stack as a whole.
-  return Ty->isIntegerTy(128);
+  // On x86-64 i128 is split into two i64s and needs to be allocated to two
+  // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
+  // is split to four i32s and never actually passed in registers, but we use
+  // the consecutive register mark to match it in TableGen.
+  if (Ty->isIntegerTy(128))
+    return true;
+
+  // On x86-32, fp128 acts the same as i128.
+  if (Subtarget.is32Bit() && Ty->isFP128Ty())
+    return true;
+
+  return false;
 }
 
 /// Helper for getByValTypeAlignment to determine
diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
index ae4a7f12bff50..2174d5056e6ce 100644
--- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
@@ -55,34 +55,41 @@ define void @store(PrimTy %x, ptr %p) nounwind {
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl 12(%esp), %eax
-; CHECK-X86-NEXT:    movl 16(%esp), %ecx
-; CHECK-X86-NEXT:    movl 20(%esp), %edx
-; CHECK-X86-NEXT:    movl 24(%esp), %esi
-; CHECK-X86-NEXT:    movl 28(%esp), %edi
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %eax
+; CHECK-X86-NEXT:    movl 20(%esp), %ecx
+; CHECK-X86-NEXT:    movl 24(%esp), %edx
+; CHECK-X86-NEXT:    movl 28(%esp), %esi
+; CHECK-X86-NEXT:    movl 32(%esp), %edi
 ; CHECK-X86-NEXT:    movl %esi, 12(%edi)
 ; CHECK-X86-NEXT:    movl %edx, 8(%edi)
 ; CHECK-X86-NEXT:    movl %ecx, 4(%edi)
 ; CHECK-X86-NEXT:    movl %eax, (%edi)
+; CHECK-X86-NEXT:    addl $4, %esp
 ; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    popl %edi
 ; CHECK-X86-NEXT:    retl
 ;
 ; CHECK-MSVC32-LABEL: store:
 ; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl 16(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl 20(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl 24(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl 28(%esp), %edi
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl 24(%ebp), %edi
 ; CHECK-MSVC32-NEXT:    movl %esi, 12(%edi)
 ; CHECK-MSVC32-NEXT:    movl %edx, 8(%edi)
 ; CHECK-MSVC32-NEXT:    movl %ecx, 4(%edi)
 ; CHECK-MSVC32-NEXT:    movl %eax, (%edi)
+; CHECK-MSVC32-NEXT:    leal -8(%ebp), %esp
 ; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   store PrimTy %x, ptr %p
   ret void
@@ -129,34 +136,41 @@ define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind {
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl 16(%esp), %eax
-; CHECK-X86-NEXT:    movl 20(%esp), %ecx
-; CHECK-X86-NEXT:    movl 24(%esp), %edx
-; CHECK-X86-NEXT:    movl 28(%esp), %esi
-; CHECK-X86-NEXT:    movl 32(%esp), %edi
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    movl 32(%esp), %eax
+; CHECK-X86-NEXT:    movl 36(%esp), %ecx
+; CHECK-X86-NEXT:    movl 40(%esp), %edx
+; CHECK-X86-NEXT:    movl 44(%esp), %esi
+; CHECK-X86-NEXT:    movl 48(%esp), %edi
 ; CHECK-X86-NEXT:    movl %esi, 12(%edi)
 ; CHECK-X86-NEXT:    movl %edx, 8(%edi)
 ; CHECK-X86-NEXT:    movl %ecx, 4(%edi)
 ; CHECK-X86-NEXT:    movl %eax, (%edi)
+; CHECK-X86-NEXT:    addl $4, %esp
 ; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    popl %edi
 ; CHECK-X86-NEXT:    retl
 ;
 ; CHECK-MSVC32-LABEL: store_perturbed:
 ; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl 16(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl 20(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl 24(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl 28(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl 32(%esp), %edi
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    movl 24(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 28(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 32(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 36(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl 40(%ebp), %edi
 ; CHECK-MSVC32-NEXT:    movl %esi, 12(%edi)
 ; CHECK-MSVC32-NEXT:    movl %edx, 8(%edi)
 ; CHECK-MSVC32-NEXT:    movl %ecx, 4(%edi)
 ; CHECK-MSVC32-NEXT:    movl %eax, (%edi)
+; CHECK-MSVC32-NEXT:    leal -8(%ebp), %esp
 ; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   store PrimTy %x, ptr %p
   ret void
@@ -270,34 +284,41 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl 12(%esp), %eax
-; CHECK-X86-NEXT:    movl 16(%esp), %ecx
-; CHECK-X86-NEXT:    movl 20(%esp), %edx
-; CHECK-X86-NEXT:    movl 24(%esp), %esi
-; CHECK-X86-NEXT:    movl 28(%esp), %edi
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %eax
+; CHECK-X86-NEXT:    movl 32(%esp), %ecx
+; CHECK-X86-NEXT:    movl 36(%esp), %edx
+; CHECK-X86-NEXT:    movl 40(%esp), %esi
+; CHECK-X86-NEXT:    movl 44(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
 ; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    addl $4, %esp
 ; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    popl %edi
 ; CHECK-X86-NEXT:    retl $4
 ;
 ; CHECK-MSVC32-LABEL: first_arg:
 ; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl 16(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl 20(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl 24(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl 28(%esp), %edi
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 24(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 28(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 32(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl 36(%ebp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
 ; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    leal -8(%ebp), %esp
 ; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   ret PrimTy %x
 }
@@ -343,34 +364,41 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl 12(%esp), %eax
-; CHECK-X86-NEXT:    movl 48(%esp), %ecx
-; CHECK-X86-NEXT:    movl 52(%esp), %edx
-; CHECK-X86-NEXT:    movl 56(%esp), %esi
-; CHECK-X86-NEXT:    movl 60(%esp), %edi
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %eax
+; CHECK-X86-NEXT:    movl 64(%esp), %ecx
+; CHECK-X86-NEXT:    movl 68(%esp), %edx
+; CHECK-X86-NEXT:    movl 72(%esp), %esi
+; CHECK-X86-NEXT:    movl 76(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
 ; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    addl $4, %esp
 ; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    popl %edi
 ; CHECK-X86-NEXT:    retl $4
 ;
 ; CHECK-MSVC32-LABEL: leading_args:
 ; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl 48(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl 52(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl 56(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl 60(%esp), %edi
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 56(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 60(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 64(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl 68(%ebp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
 ; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    leal -8(%ebp), %esp
 ; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   ret PrimTy %x
 }
@@ -416,34 +444,41 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl 12(%esp), %eax
-; CHECK-X86-NEXT:    movl 72(%esp), %ecx
-; CHECK-X86-NEXT:    movl 76(%esp), %edx
-; CHECK-X86-NEXT:    movl 80(%esp), %esi
-; CHECK-X86-NEXT:    movl 84(%esp), %edi
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %eax
+; CHECK-X86-NEXT:    movl 80(%esp), %ecx
+; CHECK-X86-NEXT:    movl 84(%esp), %edx
+; CHECK-X86-NEXT:    movl 88(%esp), %esi
+; CHECK-X86-NEXT:    movl 92(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
 ; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    addl $4, %esp
 ; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    popl %edi
 ; CHECK-X86-NEXT:    retl $4
 ;
 ; CHECK-MSVC32-LABEL: many_leading_args:
 ; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl 72(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl 76(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl 80(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl 84(%esp), %edi
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 72(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 76(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 80(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl 84(%ebp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
 ; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    leal -8(%ebp), %esp
 ; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   ret PrimTy %x
 }
@@ -487,34 +522,41 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    pushl %edi
 ; CHECK-X86-NEXT:    pushl %esi
-; CHECK-X86-NEXT:    movl 12(%esp), %eax
-; CHECK-X86-NEXT:    movl 56(%esp), %ecx
-; CHECK-X86-NEXT:    movl 60(%esp), %edx
-; CHECK-X86-NEXT:    movl 64(%esp), %esi
-; CHECK-X86-NEXT:    movl 68(%esp), %edi
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    movl 16(%esp), %eax
+; CHECK-X86-NEXT:    movl 64(%esp), %ecx
+; CHECK-X86-NEXT:    movl 68(%esp), %edx
+; CHECK-X86-NEXT:    movl 72(%esp), %esi
+; CHECK-X86-NEXT:    movl 76(%esp), %edi
 ; CHECK-X86-NEXT:    movl %edi, 12(%eax)
 ; CHECK-X86-NEXT:    movl %esi, 8(%eax)
 ; CHECK-X86-NEXT:    movl %edx, 4(%eax)
 ; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    addl $4, %esp
 ; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    popl %edi
 ; CHECK-X86-NEXT:    retl $4
 ;
 ; CHECK-MSVC32-LABEL: trailing_arg:
 ; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
 ; CHECK-MSVC32-NEXT:    pushl %edi
 ; CHECK-MSVC32-NEXT:    pushl %esi
-; CHECK-MSVC32-NEXT:    movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT:    movl 56(%esp), %ecx
-; CHECK-MSVC32-NEXT:    movl 60(%esp), %edx
-; CHECK-MSVC32-NEXT:    movl 64(%esp), %esi
-; CHECK-MSVC32-NEXT:    movl 68(%esp), %edi
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 56(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 60(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 64(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl 68(%ebp), %edi
 ; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
 ; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
 ; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
 ; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    leal -8(%ebp), %esp
 ; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   ret PrimTy %x
 }
@@ -570,32 +612,43 @@ define void @call_first_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-X86-LABEL: call_first_arg:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal 12(%esp), %eax
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    subl $56, %esp
+; CHECK-X86-NEXT:    movl 64(%esp), %eax
+; CHECK-X86-NEXT:    movl 68(%esp), %ecx
+; CHECK-X86-NEXT:    movl 72(%esp), %edx
+; CHECK-X86-NEXT:    movl 76(%esp), %esi
+; CHECK-X86-NEXT:    movl %esi, 28(%esp)
+; CHECK-X86-NEXT:    movl %edx, 24(%esp)
+; CHECK-X86-NEXT:    movl %ecx, 20(%esp)
+; CHECK-X86-NEXT:    movl %eax, 16(%esp)
+; CHECK-X86-NEXT:    leal 32(%esp), %eax
+; CHECK-X86-NEXT:    movl %eax, (%esp)
 ; CHECK-X86-NEXT:    calll first_arg at PLT
-; CHECK-X86-NEXT:    addl $56, %esp
+; CHECK-X86-NEXT:    addl $52, %esp
+; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    retl
 ;
 ; CHECK-MSVC32-LABEL: call_first_arg:
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %ebp
 ; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    pushl %esi
 ; CHECK-MSVC32-NEXT:    andl $-16, %esp
-; CHECK-MSVC32-NEXT:    subl $32, %esp
-; CHECK-MSVC32-NEXT:    movl %esp, %eax
-; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
-; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    subl $64, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl %esi, 28(%esp)
+; CHECK-MSVC32-NEXT:    movl %edx, 24(%esp)
+; CHECK-MSVC32-NEXT:    movl %ecx, 20(%esp)
+; CHECK-MSVC32-NEXT:    movl %eax, 16(%esp)
+; CHECK-MSVC32-NEXT:    leal 32(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl %eax, (%esp)
 ; CHECK-MSVC32-NEXT:    calll _first_arg
-; CHECK-MSVC32-NEXT:    addl $20, %esp
-; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   call PrimTy @first_arg(PrimTy %x)
@@ -685,48 +738,59 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ;
 ; CHECK-X86-LABEL: call_leading_args:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal 12(%esp), %eax
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    subl $88, %esp
+; CHECK-X86-NEXT:    movl 96(%esp), %eax
+; CHECK-X86-NEXT:    movl 100(%esp), %ecx
+; CHECK-X86-NEXT:    movl 104(%esp), %edx
+; CHECK-X86-NEXT:    movl 108(%esp), %esi
+; CHECK-X86-NEXT:    movl %esi, 60(%esp)
+; CHECK-X86-NEXT:    movl %edx, 56(%esp)
+; CHECK-X86-NEXT:    movl %ecx, 52(%esp)
+; CHECK-X86-NEXT:    movl %eax, 48(%esp)
+; CHECK-X86-NEXT:    leal 64(%esp), %eax
+; CHECK-X86-NEXT:    movl %eax, (%esp)
+; CHECK-X86-NEXT:    movl $0, 32(%esp)
+; CHECK-X86-NEXT:    movl $0, 28(%esp)
+; CHECK-X86-NEXT:    movl $0, 24(%esp)
+; CHECK-X86-NEXT:    movl $0, 20(%esp)
+; CHECK-X86-NEXT:    movl $0, 16(%esp)
+; CHECK-X86-NEXT:    movl $0, 12(%esp)
+; CHECK-X86-NEXT:    movl $0, 8(%esp)
+; CHECK-X86-NEXT:    movl $0, 4(%esp)
 ; CHECK-X86-NEXT:    calll leading_args at PLT
-; CHECK-X86-NEXT:    addl $88, %esp
+; CHECK-X86-NEXT:    addl $84, %esp
+; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    retl
 ;
 ; CHECK-MSVC32-LABEL: call_leading_args:
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %ebp
 ; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    pushl %esi
 ; CHECK-MSVC32-NEXT:    andl $-16, %esp
-; CHECK-MSVC32-NEXT:    subl $32, %esp
-; CHECK-MSVC32-NEXT:    movl %esp, %eax
-; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    subl $96, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl %esi, 60(%esp)
+; CHECK-MSVC32-NEXT:    movl %edx, 56(%esp)
+; CHECK-MSVC32-NEXT:    movl %ecx, 52(%esp)
+; CHECK-MSVC32-NEXT:    movl %eax, 48(%esp)
+; CHECK-MSVC32-NEXT:    leal 64(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl %eax, (%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 4(%esp)
 ; CHECK-MSVC32-NEXT:    calll _leading_args
-; CHECK-MSVC32-NEXT:    addl $52, %esp
-; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   call PrimTy @leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy %x)
@@ -835,56 +899,67 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ;
 ; CHECK-X86-LABEL: call_many_leading_args:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal 12(%esp), %eax
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    subl $104, %esp
+; CHECK-X86-NEXT:    movl 112(%esp), %eax
+; CHECK-X86-NEXT:    movl 116(%esp), %ecx
+; CHECK-X86-NEXT:    movl 120(%esp), %edx
+; CHECK-X86-NEXT:    movl 124(%esp), %esi
+; CHECK-X86-NEXT:    movl %esi, 76(%esp)
+; CHECK-X86-NEXT:    movl %edx, 72(%esp)
+; CHECK-X86-NEXT:    movl %ecx, 68(%esp)
+; CHECK-X86-NEXT:    movl %eax, 64(%esp)
+; CHECK-X86-NEXT:    leal 80(%esp), %eax
+; CHECK-X86-NEXT:    movl %eax, (%esp)
+; CHECK-X86-NEXT:    movl $0, 60(%esp)
+; CHECK-X86-NEXT:    movl $0, 56(%esp)
+; CHECK-X86-NEXT:    movl $0, 52(%esp)
+; CHECK-X86-NEXT:    movl $0, 48(%esp)
+; CHECK-X86-NEXT:    movl $0, 32(%esp)
+; CHECK-X86-NEXT:    movl $0, 28(%esp)
+; CHECK-X86-NEXT:    movl $0, 24(%esp)
+; CHECK-X86-NEXT:    movl $0, 20(%esp)
+; CHECK-X86-NEXT:    movl $0, 16(%esp)
+; CHECK-X86-NEXT:    movl $0, 12(%esp)
+; CHECK-X86-NEXT:    movl $0, 8(%esp)
+; CHECK-X86-NEXT:    movl $0, 4(%esp)
 ; CHECK-X86-NEXT:    calll many_leading_args at PLT
-; CHECK-X86-NEXT:    addl $104, %esp
+; CHECK-X86-NEXT:    addl $100, %esp
+; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    retl
 ;
 ; CHECK-MSVC32-LABEL: call_many_leading_args:
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %ebp
 ; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    pushl %esi
 ; CHECK-MSVC32-NEXT:    andl $-16, %esp
-; CHECK-MSVC32-NEXT:    subl $32, %esp
-; CHECK-MSVC32-NEXT:    movl %esp, %eax
-; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    subl $112, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl %esi, 76(%esp)
+; CHECK-MSVC32-NEXT:    movl %edx, 72(%esp)
+; CHECK-MSVC32-NEXT:    movl %ecx, 68(%esp)
+; CHECK-MSVC32-NEXT:    movl %eax, 64(%esp)
+; CHECK-MSVC32-NEXT:    leal 80(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl %eax, (%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 60(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 56(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 52(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 48(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 4(%esp)
 ; CHECK-MSVC32-NEXT:    calll _many_leading_args
-; CHECK-MSVC32-NEXT:    addl $68, %esp
-; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   call PrimTy @many_leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy Prim0, PrimTy %x)
@@ -974,48 +1049,59 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-X86-LABEL: call_trailing_arg:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    subl $40, %esp
-; CHECK-X86-NEXT:    leal 12(%esp), %eax
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl 56(%esp)
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl $0
-; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    subl $88, %esp
+; CHECK-X86-NEXT:    movl 96(%esp), %eax
+; CHECK-X86-NEXT:    movl 100(%esp), %ecx
+; CHECK-X86-NEXT:    movl 104(%esp), %edx
+; CHECK-X86-NEXT:    movl 108(%esp), %esi
+; CHECK-X86-NEXT:    movl %esi, 60(%esp)
+; CHECK-X86-NEXT:    movl %edx, 56(%esp)
+; CHECK-X86-NEXT:    movl %ecx, 52(%esp)
+; CHECK-X86-NEXT:    movl %eax, 48(%esp)
+; CHECK-X86-NEXT:    leal 64(%esp), %eax
+; CHECK-X86-NEXT:    movl %eax, (%esp)
+; CHECK-X86-NEXT:    movl $0, 32(%esp)
+; CHECK-X86-NEXT:    movl $0, 28(%esp)
+; CHECK-X86-NEXT:    movl $0, 24(%esp)
+; CHECK-X86-NEXT:    movl $0, 20(%esp)
+; CHECK-X86-NEXT:    movl $0, 16(%esp)
+; CHECK-X86-NEXT:    movl $0, 12(%esp)
+; CHECK-X86-NEXT:    movl $0, 8(%esp)
+; CHECK-X86-NEXT:    movl $0, 4(%esp)
 ; CHECK-X86-NEXT:    calll trailing_arg at PLT
-; CHECK-X86-NEXT:    addl $88, %esp
+; CHECK-X86-NEXT:    addl $84, %esp
+; CHECK-X86-NEXT:    popl %esi
 ; CHECK-X86-NEXT:    retl
 ;
 ; CHECK-MSVC32-LABEL: call_trailing_arg:
 ; CHECK-MSVC32:       # %bb.0:
 ; CHECK-MSVC32-NEXT:    pushl %ebp
 ; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    pushl %esi
 ; CHECK-MSVC32-NEXT:    andl $-16, %esp
-; CHECK-MSVC32-NEXT:    subl $32, %esp
-; CHECK-MSVC32-NEXT:    movl %esp, %eax
-; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
-; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl $0
-; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    subl $96, %esp
+; CHECK-MSVC32-NEXT:    movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT:    movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT:    movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT:    movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT:    movl %esi, 60(%esp)
+; CHECK-MSVC32-NEXT:    movl %edx, 56(%esp)
+; CHECK-MSVC32-NEXT:    movl %ecx, 52(%esp)
+; CHECK-MSVC32-NEXT:    movl %eax, 48(%esp)
+; CHECK-MSVC32-NEXT:    leal 64(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl %eax, (%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT:    movl $0, 4(%esp)
 ; CHECK-MSVC32-NEXT:    calll _trailing_arg
-; CHECK-MSVC32-NEXT:    addl $52, %esp
-; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT:    popl %esi
 ; CHECK-MSVC32-NEXT:    popl %ebp
 ; CHECK-MSVC32-NEXT:    retl
   call PrimTy @trailing_arg(i64 0, i64 0, i64 0, i64 0, PrimTy %x)



More information about the llvm-commits mailing list