[llvm] [X86] Align f128 and i128 to 16 bytes when passing on x86-32 (PR #138092)
Trevor Gross via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 05:10:31 PDT 2025
https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/138092
>From f3883930e548943ca4e98e542caeeecac1d23118 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 11 Jul 2025 06:26:56 -0400
Subject: [PATCH 1/3] [X86] Update the fp128/i128 test to show stack alignment
(nfc)
Add a test showing that adding an extra argument before a `fp128` only
changes the stack offset by four bytes. Also set `no_x86_scrub_sp`
because that is relevant.
---
llvm/test/CodeGen/X86/i128-fp128-abi.ll | 412 ++++++++++++++++--------
1 file changed, 281 insertions(+), 131 deletions(-)
diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
index be8f7923b8f98..ae4a7f12bff50 100644
--- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
; Combined ABI tests for fp128 and i128
@@ -15,6 +15,153 @@
; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32
; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32
+define void @store(PrimTy %x, ptr %p) nounwind {
+; CHECK-X64-F128-LABEL: store:
+; CHECK-X64-F128: # %bb.0:
+; CHECK-X64-F128-NEXT: movaps %xmm0, (%rdi)
+; CHECK-X64-F128-NEXT: retq
+;
+; CHECK-X64-I128-LABEL: store:
+; CHECK-X64-I128: # %bb.0:
+; CHECK-X64-I128-NEXT: movq %rsi, 8(%rdx)
+; CHECK-X64-I128-NEXT: movq %rdi, (%rdx)
+; CHECK-X64-I128-NEXT: retq
+;
+; CHECK-MSVC64-F128-LABEL: store:
+; CHECK-MSVC64-F128: # %bb.0:
+; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rdx)
+; CHECK-MSVC64-F128-NEXT: retq
+;
+; CHECK-MSVC64-I128-LABEL: store:
+; CHECK-MSVC64-I128: # %bb.0:
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 8(%r8)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, (%r8)
+; CHECK-MSVC64-I128-NEXT: retq
+;
+; CHECK-MINGW-F128-LABEL: store:
+; CHECK-MINGW-F128: # %bb.0:
+; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rdx)
+; CHECK-MINGW-F128-NEXT: retq
+;
+; CHECK-MINGW-I128-LABEL: store:
+; CHECK-MINGW-I128: # %bb.0:
+; CHECK-MINGW-I128-NEXT: movq %rdx, 8(%r8)
+; CHECK-MINGW-I128-NEXT: movq %rcx, (%r8)
+; CHECK-MINGW-I128-NEXT: retq
+;
+; CHECK-X86-LABEL: store:
+; CHECK-X86: # %bb.0:
+; CHECK-X86-NEXT: pushl %edi
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 16(%esp), %ecx
+; CHECK-X86-NEXT: movl 20(%esp), %edx
+; CHECK-X86-NEXT: movl 24(%esp), %esi
+; CHECK-X86-NEXT: movl 28(%esp), %edi
+; CHECK-X86-NEXT: movl %esi, 12(%edi)
+; CHECK-X86-NEXT: movl %edx, 8(%edi)
+; CHECK-X86-NEXT: movl %ecx, 4(%edi)
+; CHECK-X86-NEXT: movl %eax, (%edi)
+; CHECK-X86-NEXT: popl %esi
+; CHECK-X86-NEXT: popl %edi
+; CHECK-X86-NEXT: retl
+;
+; CHECK-MSVC32-LABEL: store:
+; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %edi
+; CHECK-MSVC32-NEXT: pushl %esi
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 20(%esp), %edx
+; CHECK-MSVC32-NEXT: movl 24(%esp), %esi
+; CHECK-MSVC32-NEXT: movl 28(%esp), %edi
+; CHECK-MSVC32-NEXT: movl %esi, 12(%edi)
+; CHECK-MSVC32-NEXT: movl %edx, 8(%edi)
+; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi)
+; CHECK-MSVC32-NEXT: movl %eax, (%edi)
+; CHECK-MSVC32-NEXT: popl %esi
+; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: retl
+ store PrimTy %x, ptr %p
+ ret void
+}
+
+; Illustrate stack alignment
+define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind {
+; CHECK-X64-F128-LABEL: store_perturbed:
+; CHECK-X64-F128: # %bb.0:
+; CHECK-X64-F128-NEXT: movaps %xmm0, (%rsi)
+; CHECK-X64-F128-NEXT: retq
+;
+; CHECK-X64-I128-LABEL: store_perturbed:
+; CHECK-X64-I128: # %bb.0:
+; CHECK-X64-I128-NEXT: movq %rdx, 8(%rcx)
+; CHECK-X64-I128-NEXT: movq %rsi, (%rcx)
+; CHECK-X64-I128-NEXT: retq
+;
+; CHECK-MSVC64-F128-LABEL: store_perturbed:
+; CHECK-MSVC64-F128: # %bb.0:
+; CHECK-MSVC64-F128-NEXT: movaps (%rdx), %xmm0
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%r8)
+; CHECK-MSVC64-F128-NEXT: retq
+;
+; CHECK-MSVC64-I128-LABEL: store_perturbed:
+; CHECK-MSVC64-I128: # %bb.0:
+; CHECK-MSVC64-I128-NEXT: movq %r8, 8(%r9)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, (%r9)
+; CHECK-MSVC64-I128-NEXT: retq
+;
+; CHECK-MINGW-F128-LABEL: store_perturbed:
+; CHECK-MINGW-F128: # %bb.0:
+; CHECK-MINGW-F128-NEXT: movaps (%rdx), %xmm0
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%r8)
+; CHECK-MINGW-F128-NEXT: retq
+;
+; CHECK-MINGW-I128-LABEL: store_perturbed:
+; CHECK-MINGW-I128: # %bb.0:
+; CHECK-MINGW-I128-NEXT: movq %r8, 8(%r9)
+; CHECK-MINGW-I128-NEXT: movq %rdx, (%r9)
+; CHECK-MINGW-I128-NEXT: retq
+;
+; CHECK-X86-LABEL: store_perturbed:
+; CHECK-X86: # %bb.0:
+; CHECK-X86-NEXT: pushl %edi
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 20(%esp), %ecx
+; CHECK-X86-NEXT: movl 24(%esp), %edx
+; CHECK-X86-NEXT: movl 28(%esp), %esi
+; CHECK-X86-NEXT: movl 32(%esp), %edi
+; CHECK-X86-NEXT: movl %esi, 12(%edi)
+; CHECK-X86-NEXT: movl %edx, 8(%edi)
+; CHECK-X86-NEXT: movl %ecx, 4(%edi)
+; CHECK-X86-NEXT: movl %eax, (%edi)
+; CHECK-X86-NEXT: popl %esi
+; CHECK-X86-NEXT: popl %edi
+; CHECK-X86-NEXT: retl
+;
+; CHECK-MSVC32-LABEL: store_perturbed:
+; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %edi
+; CHECK-MSVC32-NEXT: pushl %esi
+; CHECK-MSVC32-NEXT: movl 16(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 20(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 24(%esp), %edx
+; CHECK-MSVC32-NEXT: movl 28(%esp), %esi
+; CHECK-MSVC32-NEXT: movl 32(%esp), %edi
+; CHECK-MSVC32-NEXT: movl %esi, 12(%edi)
+; CHECK-MSVC32-NEXT: movl %edx, 8(%edi)
+; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi)
+; CHECK-MSVC32-NEXT: movl %eax, (%edi)
+; CHECK-MSVC32-NEXT: popl %esi
+; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: retl
+ store PrimTy %x, ptr %p
+ ret void
+}
+
define PrimTy @return(ptr %p) nounwind {
; CHECK-X64-F128-LABEL: return:
; CHECK-X64-F128: # %bb.0:
@@ -53,8 +200,8 @@ define PrimTy @return(ptr %p) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 16(%esp), %ecx
; CHECK-X86-NEXT: movl (%ecx), %edx
; CHECK-X86-NEXT: movl 4(%ecx), %esi
; CHECK-X86-NEXT: movl 8(%ecx), %edi
@@ -71,8 +218,8 @@ define PrimTy @return(ptr %p) nounwind {
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx
; CHECK-MSVC32-NEXT: movl (%ecx), %edx
; CHECK-MSVC32-NEXT: movl 4(%ecx), %esi
; CHECK-MSVC32-NEXT: movl 8(%ecx), %edi
@@ -123,11 +270,11 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 16(%esp), %ecx
+; CHECK-X86-NEXT: movl 20(%esp), %edx
+; CHECK-X86-NEXT: movl 24(%esp), %esi
+; CHECK-X86-NEXT: movl 28(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
@@ -140,11 +287,11 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 20(%esp), %edx
+; CHECK-MSVC32-NEXT: movl 24(%esp), %esi
+; CHECK-MSVC32-NEXT: movl 28(%esp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
@@ -155,6 +302,8 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
ret PrimTy %x
}
+; Leading args such that i128 is the last possible position where it still
+; gets passed in registers.
define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounwind {
; CHECK-X64-F128-LABEL: leading_args:
; CHECK-X64-F128: # %bb.0:
@@ -168,37 +317,37 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
;
; CHECK-MSVC64-F128-LABEL: leading_args:
; CHECK-MSVC64-F128: # %bb.0:
-; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq 40(%rsp), %rax
; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MSVC64-F128-NEXT: retq
;
; CHECK-MSVC64-I128-LABEL: leading_args:
; CHECK-MSVC64-I128: # %bb.0:
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT: movq 40(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT: movq 48(%rsp), %rdx
; CHECK-MSVC64-I128-NEXT: retq
;
; CHECK-MINGW-F128-LABEL: leading_args:
; CHECK-MINGW-F128: # %bb.0:
-; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq 40(%rsp), %rax
; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MINGW-F128-NEXT: retq
;
; CHECK-MINGW-I128-LABEL: leading_args:
; CHECK-MINGW-I128: # %bb.0:
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT: movq 40(%rsp), %rax
+; CHECK-MINGW-I128-NEXT: movq 48(%rsp), %rdx
; CHECK-MINGW-I128-NEXT: retq
;
; CHECK-X86-LABEL: leading_args:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 48(%esp), %ecx
+; CHECK-X86-NEXT: movl 52(%esp), %edx
+; CHECK-X86-NEXT: movl 56(%esp), %esi
+; CHECK-X86-NEXT: movl 60(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
@@ -211,11 +360,11 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 48(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 52(%esp), %edx
+; CHECK-MSVC32-NEXT: movl 56(%esp), %esi
+; CHECK-MSVC32-NEXT: movl 60(%esp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
@@ -226,6 +375,7 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
ret PrimTy %x
}
+; The i128 of interest must be in memory.
define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy %_5, PrimTy %x) nounwind {
; CHECK-X64-F128-LABEL: many_leading_args:
; CHECK-X64-F128: # %bb.0:
@@ -234,43 +384,43 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
;
; CHECK-X64-I128-LABEL: many_leading_args:
; CHECK-X64-I128: # %bb.0:
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-X64-I128-NEXT: movq 24(%rsp), %rax
+; CHECK-X64-I128-NEXT: movq 32(%rsp), %rdx
; CHECK-X64-I128-NEXT: retq
;
; CHECK-MSVC64-F128-LABEL: many_leading_args:
; CHECK-MSVC64-F128: # %bb.0:
-; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq 56(%rsp), %rax
; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MSVC64-F128-NEXT: retq
;
; CHECK-MSVC64-I128-LABEL: many_leading_args:
; CHECK-MSVC64-I128: # %bb.0:
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT: movq 64(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT: movq 72(%rsp), %rdx
; CHECK-MSVC64-I128-NEXT: retq
;
; CHECK-MINGW-F128-LABEL: many_leading_args:
; CHECK-MINGW-F128: # %bb.0:
-; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq 56(%rsp), %rax
; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MINGW-F128-NEXT: retq
;
; CHECK-MINGW-I128-LABEL: many_leading_args:
; CHECK-MINGW-I128: # %bb.0:
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT: movq 64(%rsp), %rax
+; CHECK-MINGW-I128-NEXT: movq 72(%rsp), %rdx
; CHECK-MINGW-I128-NEXT: retq
;
; CHECK-X86-LABEL: many_leading_args:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 72(%esp), %ecx
+; CHECK-X86-NEXT: movl 76(%esp), %edx
+; CHECK-X86-NEXT: movl 80(%esp), %esi
+; CHECK-X86-NEXT: movl 84(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
@@ -283,11 +433,11 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 72(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 76(%esp), %edx
+; CHECK-MSVC32-NEXT: movl 80(%esp), %esi
+; CHECK-MSVC32-NEXT: movl 84(%esp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
@@ -305,43 +455,43 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
;
; CHECK-X64-I128-LABEL: trailing_arg:
; CHECK-X64-I128: # %bb.0:
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-X64-I128-NEXT: movq 8(%rsp), %rax
+; CHECK-X64-I128-NEXT: movq 16(%rsp), %rdx
; CHECK-X64-I128-NEXT: retq
;
; CHECK-MSVC64-F128-LABEL: trailing_arg:
; CHECK-MSVC64-F128: # %bb.0:
-; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq 48(%rsp), %rax
; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MSVC64-F128-NEXT: retq
;
; CHECK-MSVC64-I128-LABEL: trailing_arg:
; CHECK-MSVC64-I128: # %bb.0:
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT: movq 48(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT: movq 56(%rsp), %rdx
; CHECK-MSVC64-I128-NEXT: retq
;
; CHECK-MINGW-F128-LABEL: trailing_arg:
; CHECK-MINGW-F128: # %bb.0:
-; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq 48(%rsp), %rax
; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MINGW-F128-NEXT: retq
;
; CHECK-MINGW-I128-LABEL: trailing_arg:
; CHECK-MINGW-I128: # %bb.0:
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT: movq 48(%rsp), %rax
+; CHECK-MINGW-I128-NEXT: movq 56(%rsp), %rdx
; CHECK-MINGW-I128-NEXT: retq
;
; CHECK-X86-LABEL: trailing_arg:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 56(%esp), %ecx
+; CHECK-X86-NEXT: movl 60(%esp), %edx
+; CHECK-X86-NEXT: movl 64(%esp), %esi
+; CHECK-X86-NEXT: movl 68(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
@@ -354,11 +504,11 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 56(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 60(%esp), %edx
+; CHECK-MSVC32-NEXT: movl 64(%esp), %esi
+; CHECK-MSVC32-NEXT: movl 68(%esp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
@@ -388,8 +538,8 @@ define void @call_first_arg(PrimTy %x) nounwind {
; CHECK-MSVC64-F128: # %bb.0:
; CHECK-MSVC64-F128-NEXT: subq $56, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 32(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 32(%rsp), %rcx
; CHECK-MSVC64-F128-NEXT: callq first_arg
; CHECK-MSVC64-F128-NEXT: addq $56, %rsp
; CHECK-MSVC64-F128-NEXT: retq
@@ -405,8 +555,8 @@ define void @call_first_arg(PrimTy %x) nounwind {
; CHECK-MINGW-F128: # %bb.0:
; CHECK-MINGW-F128-NEXT: subq $56, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 32(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 32(%rsp), %rcx
; CHECK-MINGW-F128-NEXT: callq first_arg
; CHECK-MINGW-F128-NEXT: addq $56, %rsp
; CHECK-MINGW-F128-NEXT: retq
@@ -421,11 +571,11 @@ define void @call_first_arg(PrimTy %x) nounwind {
; CHECK-X86-LABEL: call_first_arg:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT: leal 12(%esp), %eax
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
; CHECK-X86-NEXT: pushl %eax
; CHECK-X86-NEXT: calll first_arg at PLT
; CHECK-X86-NEXT: addl $56, %esp
@@ -481,9 +631,9 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-F128: # %bb.0:
; CHECK-MSVC64-F128-NEXT: subq $72, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d
@@ -495,8 +645,8 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-I128-LABEL: call_leading_args:
; CHECK-MSVC64-I128: # %bb.0:
; CHECK-MSVC64-I128-NEXT: subq $56, %rsp
-; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d
@@ -509,9 +659,9 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-F128: # %bb.0:
; CHECK-MINGW-F128-NEXT: subq $72, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-F128-NEXT: xorl %edx, %edx
; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d
@@ -523,8 +673,8 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-I128-LABEL: call_leading_args:
; CHECK-MINGW-I128: # %bb.0:
; CHECK-MINGW-I128-NEXT: subq $56, %rsp
-; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-I128-NEXT: xorl %edx, %edx
; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d
@@ -536,11 +686,11 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-X86-LABEL: call_leading_args:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT: leal 12(%esp), %eax
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
; CHECK-X86-NEXT: pushl $0
; CHECK-X86-NEXT: pushl $0
; CHECK-X86-NEXT: pushl $0
@@ -620,12 +770,12 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-F128-NEXT: subq $88, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
; CHECK-MSVC64-F128-NEXT: xorps %xmm1, %xmm1
-; CHECK-MSVC64-F128-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm1, 64(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 40(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 64(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d
@@ -637,10 +787,10 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-I128-LABEL: call_many_leading_args:
; CHECK-MSVC64-I128: # %bb.0:
; CHECK-MSVC64-I128-NEXT: subq $72, %rsp
-; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 56(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, 48(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq $0, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq $0, 32(%rsp)
; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d
@@ -654,12 +804,12 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-F128-NEXT: subq $88, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
; CHECK-MINGW-F128-NEXT: xorps %xmm1, %xmm1
-; CHECK-MINGW-F128-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm1, 64(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 40(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 64(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-F128-NEXT: xorl %edx, %edx
; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d
@@ -671,10 +821,10 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-I128-LABEL: call_many_leading_args:
; CHECK-MINGW-I128: # %bb.0:
; CHECK-MINGW-I128-NEXT: subq $72, %rsp
-; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rdx, 56(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rcx, 48(%rsp)
+; CHECK-MINGW-I128-NEXT: movq $0, 40(%rsp)
+; CHECK-MINGW-I128-NEXT: movq $0, 32(%rsp)
; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-I128-NEXT: xorl %edx, %edx
; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d
@@ -686,11 +836,11 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-X86-LABEL: call_many_leading_args:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT: leal 12(%esp), %eax
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
; CHECK-X86-NEXT: pushl $0
; CHECK-X86-NEXT: pushl $0
; CHECK-X86-NEXT: pushl $0
@@ -770,9 +920,9 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MSVC64-F128: # %bb.0:
; CHECK-MSVC64-F128-NEXT: subq $72, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d
@@ -784,8 +934,8 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MSVC64-I128-LABEL: call_trailing_arg:
; CHECK-MSVC64-I128: # %bb.0:
; CHECK-MSVC64-I128-NEXT: subq $56, %rsp
-; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d
@@ -798,9 +948,9 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MINGW-F128: # %bb.0:
; CHECK-MINGW-F128-NEXT: subq $72, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-F128-NEXT: xorl %edx, %edx
; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d
@@ -812,8 +962,8 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MINGW-I128-LABEL: call_trailing_arg:
; CHECK-MINGW-I128: # %bb.0:
; CHECK-MINGW-I128-NEXT: subq $56, %rsp
-; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-I128-NEXT: xorl %edx, %edx
; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d
@@ -825,11 +975,11 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-X86-LABEL: call_trailing_arg:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT: leal 12(%esp), %eax
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
+; CHECK-X86-NEXT: pushl 56(%esp)
; CHECK-X86-NEXT: pushl $0
; CHECK-X86-NEXT: pushl $0
; CHECK-X86-NEXT: pushl $0
>From ee3b81ff3185294c8ba5feaef9531985a9fc0e1b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 11 Jul 2025 05:45:02 -0400
Subject: [PATCH 2/3] [X86] Align f128 and i128 to 16 bytes when passing on
x86-32
The i386 psABI specifies that `__float128` has 16 byte alignment and
must be passed on the stack; however, LLVM currently stores it in a
stack slot that has an offset of 4. Add a custom lowering to correct
this alignment to 16-byte.
i386 does not specify an `__int128`, but it seems reasonable to keep the
same behavior as `__float128` so this is changed as well.
Fixes: https://github.com/llvm/llvm-project/issues/77401
---
llvm/docs/ReleaseNotes.md | 2 +
llvm/lib/Target/X86/X86CallingConv.cpp | 32 ++
llvm/lib/Target/X86/X86CallingConv.td | 5 +
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 15 +-
llvm/test/CodeGen/X86/i128-fp128-abi.ll | 454 ++++++++++++--------
5 files changed, 321 insertions(+), 187 deletions(-)
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index daf822388a2ff..e91460d3a551c 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -228,6 +228,8 @@ Changes to the X86 Backend
--------------------------
* `fp128` will now use `*f128` libcalls on 32-bit GNU targets as well.
+* On x86-32, `fp128` and `i128` are now passed with the expected 16-byte stack
+ alignment.
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 0b4c63f7a81f7..eb39259f7166b 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -374,5 +374,37 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
+/// Special handling for i128 and fp128: on x86-32, i128 and fp128 get legalized
+/// as four i32s, but fp128 must be passed on the stack with 16-byte alignment.
+/// Technically only fp128 has a specified ABI, but it makes sense to handle
+/// i128 the same until we hear differently.
+static bool CC_X86_32_I128_FP128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ assert(ValVT == MVT::i32 && "Should have i32 parts");
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned NumRegs = PendingMembers.size();
+ assert(NumRegs == 4 && "Should have two parts");
+
+ int64_t Offset = State.AllocateStack(16, Align(16));
+ PendingMembers[0].convertToMem(Offset);
+ PendingMembers[1].convertToMem(Offset + 4);
+ PendingMembers[2].convertToMem(Offset + 8);
+ PendingMembers[3].convertToMem(Offset + 12);
+
+ State.addLoc(PendingMembers[0]);
+ State.addLoc(PendingMembers[1]);
+ State.addLoc(PendingMembers[2]);
+ State.addLoc(PendingMembers[3]);
+ PendingMembers.clear();
+ return true;
+}
+
// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 823e0caa02262..f020e0b55141c 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -859,6 +859,11 @@ def CC_X86_32_C : CallingConv<[
// The 'nest' parameter, if any, is passed in ECX.
CCIfNest<CCAssignToReg<[ECX]>>,
+ // i128 and fp128 need to be passed on the stack with a higher alignment than
+ // their legal types. Handle this with a custom function.
+ CCIfType<[i32],
+ CCIfConsecutiveRegs<CCCustom<"CC_X86_32_I128_FP128">>>,
+
// On swifttailcc pass swiftself in ECX.
CCIfCC<"CallingConv::SwiftTail",
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[ECX]>>>>,
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 9ad355311527b..b4639ac2577e8 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -237,9 +237,18 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
- // i128 split into i64 needs to be allocated to two consecutive registers,
- // or spilled to the stack as a whole.
- return Ty->isIntegerTy(128);
+ // On x86-64 i128 is split into two i64s and needs to be allocated to two
+ // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
+ // is split to four i32s and never actually passed in registers, but we use
+ // the consecutive register mark to match it in TableGen.
+ if (Ty->isIntegerTy(128))
+ return true;
+
+ // On x86-32, fp128 acts the same as i128.
+ if (Subtarget.is32Bit() && Ty->isFP128Ty())
+ return true;
+
+ return false;
}
/// Helper for getByValTypeAlignment to determine
diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
index ae4a7f12bff50..2174d5056e6ce 100644
--- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
@@ -55,34 +55,41 @@ define void @store(PrimTy %x, ptr %p) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl 12(%esp), %eax
-; CHECK-X86-NEXT: movl 16(%esp), %ecx
-; CHECK-X86-NEXT: movl 20(%esp), %edx
-; CHECK-X86-NEXT: movl 24(%esp), %esi
-; CHECK-X86-NEXT: movl 28(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 20(%esp), %ecx
+; CHECK-X86-NEXT: movl 24(%esp), %edx
+; CHECK-X86-NEXT: movl 28(%esp), %esi
+; CHECK-X86-NEXT: movl 32(%esp), %edi
; CHECK-X86-NEXT: movl %esi, 12(%edi)
; CHECK-X86-NEXT: movl %edx, 8(%edi)
; CHECK-X86-NEXT: movl %ecx, 4(%edi)
; CHECK-X86-NEXT: movl %eax, (%edi)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: store:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl 20(%esp), %edx
-; CHECK-MSVC32-NEXT: movl 24(%esp), %esi
-; CHECK-MSVC32-NEXT: movl 28(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 24(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %esi, 12(%edi)
; CHECK-MSVC32-NEXT: movl %edx, 8(%edi)
; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi)
; CHECK-MSVC32-NEXT: movl %eax, (%edi)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
store PrimTy %x, ptr %p
ret void
@@ -129,34 +136,41 @@ define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl 16(%esp), %eax
-; CHECK-X86-NEXT: movl 20(%esp), %ecx
-; CHECK-X86-NEXT: movl 24(%esp), %edx
-; CHECK-X86-NEXT: movl 28(%esp), %esi
-; CHECK-X86-NEXT: movl 32(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 32(%esp), %eax
+; CHECK-X86-NEXT: movl 36(%esp), %ecx
+; CHECK-X86-NEXT: movl 40(%esp), %edx
+; CHECK-X86-NEXT: movl 44(%esp), %esi
+; CHECK-X86-NEXT: movl 48(%esp), %edi
; CHECK-X86-NEXT: movl %esi, 12(%edi)
; CHECK-X86-NEXT: movl %edx, 8(%edi)
; CHECK-X86-NEXT: movl %ecx, 4(%edi)
; CHECK-X86-NEXT: movl %eax, (%edi)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: store_perturbed:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl 16(%esp), %eax
-; CHECK-MSVC32-NEXT: movl 20(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl 24(%esp), %edx
-; CHECK-MSVC32-NEXT: movl 28(%esp), %esi
-; CHECK-MSVC32-NEXT: movl 32(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 24(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 28(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 32(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 36(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 40(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %esi, 12(%edi)
; CHECK-MSVC32-NEXT: movl %edx, 8(%edi)
; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi)
; CHECK-MSVC32-NEXT: movl %eax, (%edi)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
store PrimTy %x, ptr %p
ret void
@@ -270,34 +284,41 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl 12(%esp), %eax
-; CHECK-X86-NEXT: movl 16(%esp), %ecx
-; CHECK-X86-NEXT: movl 20(%esp), %edx
-; CHECK-X86-NEXT: movl 24(%esp), %esi
-; CHECK-X86-NEXT: movl 28(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 32(%esp), %ecx
+; CHECK-X86-NEXT: movl 36(%esp), %edx
+; CHECK-X86-NEXT: movl 40(%esp), %esi
+; CHECK-X86-NEXT: movl 44(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: first_arg:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl 20(%esp), %edx
-; CHECK-MSVC32-NEXT: movl 24(%esp), %esi
-; CHECK-MSVC32-NEXT: movl 28(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 24(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 28(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 32(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 36(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
@@ -343,34 +364,41 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl 12(%esp), %eax
-; CHECK-X86-NEXT: movl 48(%esp), %ecx
-; CHECK-X86-NEXT: movl 52(%esp), %edx
-; CHECK-X86-NEXT: movl 56(%esp), %esi
-; CHECK-X86-NEXT: movl 60(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 64(%esp), %ecx
+; CHECK-X86-NEXT: movl 68(%esp), %edx
+; CHECK-X86-NEXT: movl 72(%esp), %esi
+; CHECK-X86-NEXT: movl 76(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: leading_args:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT: movl 48(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl 52(%esp), %edx
-; CHECK-MSVC32-NEXT: movl 56(%esp), %esi
-; CHECK-MSVC32-NEXT: movl 60(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 56(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 60(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 64(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 68(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
@@ -416,34 +444,41 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl 12(%esp), %eax
-; CHECK-X86-NEXT: movl 72(%esp), %ecx
-; CHECK-X86-NEXT: movl 76(%esp), %edx
-; CHECK-X86-NEXT: movl 80(%esp), %esi
-; CHECK-X86-NEXT: movl 84(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 80(%esp), %ecx
+; CHECK-X86-NEXT: movl 84(%esp), %edx
+; CHECK-X86-NEXT: movl 88(%esp), %esi
+; CHECK-X86-NEXT: movl 92(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: many_leading_args:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT: movl 72(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl 76(%esp), %edx
-; CHECK-MSVC32-NEXT: movl 80(%esp), %esi
-; CHECK-MSVC32-NEXT: movl 84(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 72(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 76(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 80(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 84(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
@@ -487,34 +522,41 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl 12(%esp), %eax
-; CHECK-X86-NEXT: movl 56(%esp), %ecx
-; CHECK-X86-NEXT: movl 60(%esp), %edx
-; CHECK-X86-NEXT: movl 64(%esp), %esi
-; CHECK-X86-NEXT: movl 68(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 64(%esp), %ecx
+; CHECK-X86-NEXT: movl 68(%esp), %edx
+; CHECK-X86-NEXT: movl 72(%esp), %esi
+; CHECK-X86-NEXT: movl 76(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: trailing_arg:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
-; CHECK-MSVC32-NEXT: movl 56(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl 60(%esp), %edx
-; CHECK-MSVC32-NEXT: movl 64(%esp), %esi
-; CHECK-MSVC32-NEXT: movl 68(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 56(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 60(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 64(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 68(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
@@ -570,32 +612,43 @@ define void @call_first_arg(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_first_arg:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal 12(%esp), %eax
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $56, %esp
+; CHECK-X86-NEXT: movl 64(%esp), %eax
+; CHECK-X86-NEXT: movl 68(%esp), %ecx
+; CHECK-X86-NEXT: movl 72(%esp), %edx
+; CHECK-X86-NEXT: movl 76(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 28(%esp)
+; CHECK-X86-NEXT: movl %edx, 24(%esp)
+; CHECK-X86-NEXT: movl %ecx, 20(%esp)
+; CHECK-X86-NEXT: movl %eax, 16(%esp)
+; CHECK-X86-NEXT: leal 32(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
; CHECK-X86-NEXT: calll first_arg at PLT
-; CHECK-X86-NEXT: addl $56, %esp
+; CHECK-X86-NEXT: addl $52, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_first_arg:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $64, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 28(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 24(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 20(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 16(%esp)
+; CHECK-MSVC32-NEXT: leal 32(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
; CHECK-MSVC32-NEXT: calll _first_arg
-; CHECK-MSVC32-NEXT: addl $20, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @first_arg(PrimTy %x)
@@ -685,48 +738,59 @@ define void @call_leading_args(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_leading_args:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal 12(%esp), %eax
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $88, %esp
+; CHECK-X86-NEXT: movl 96(%esp), %eax
+; CHECK-X86-NEXT: movl 100(%esp), %ecx
+; CHECK-X86-NEXT: movl 104(%esp), %edx
+; CHECK-X86-NEXT: movl 108(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 60(%esp)
+; CHECK-X86-NEXT: movl %edx, 56(%esp)
+; CHECK-X86-NEXT: movl %ecx, 52(%esp)
+; CHECK-X86-NEXT: movl %eax, 48(%esp)
+; CHECK-X86-NEXT: leal 64(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
+; CHECK-X86-NEXT: movl $0, 32(%esp)
+; CHECK-X86-NEXT: movl $0, 28(%esp)
+; CHECK-X86-NEXT: movl $0, 24(%esp)
+; CHECK-X86-NEXT: movl $0, 20(%esp)
+; CHECK-X86-NEXT: movl $0, 16(%esp)
+; CHECK-X86-NEXT: movl $0, 12(%esp)
+; CHECK-X86-NEXT: movl $0, 8(%esp)
+; CHECK-X86-NEXT: movl $0, 4(%esp)
; CHECK-X86-NEXT: calll leading_args at PLT
-; CHECK-X86-NEXT: addl $88, %esp
+; CHECK-X86-NEXT: addl $84, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_leading_args:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $96, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 60(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 56(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 52(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 48(%esp)
+; CHECK-MSVC32-NEXT: leal 64(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
+; CHECK-MSVC32-NEXT: movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 4(%esp)
; CHECK-MSVC32-NEXT: calll _leading_args
-; CHECK-MSVC32-NEXT: addl $52, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy %x)
@@ -835,56 +899,67 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_many_leading_args:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal 12(%esp), %eax
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $104, %esp
+; CHECK-X86-NEXT: movl 112(%esp), %eax
+; CHECK-X86-NEXT: movl 116(%esp), %ecx
+; CHECK-X86-NEXT: movl 120(%esp), %edx
+; CHECK-X86-NEXT: movl 124(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 76(%esp)
+; CHECK-X86-NEXT: movl %edx, 72(%esp)
+; CHECK-X86-NEXT: movl %ecx, 68(%esp)
+; CHECK-X86-NEXT: movl %eax, 64(%esp)
+; CHECK-X86-NEXT: leal 80(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
+; CHECK-X86-NEXT: movl $0, 60(%esp)
+; CHECK-X86-NEXT: movl $0, 56(%esp)
+; CHECK-X86-NEXT: movl $0, 52(%esp)
+; CHECK-X86-NEXT: movl $0, 48(%esp)
+; CHECK-X86-NEXT: movl $0, 32(%esp)
+; CHECK-X86-NEXT: movl $0, 28(%esp)
+; CHECK-X86-NEXT: movl $0, 24(%esp)
+; CHECK-X86-NEXT: movl $0, 20(%esp)
+; CHECK-X86-NEXT: movl $0, 16(%esp)
+; CHECK-X86-NEXT: movl $0, 12(%esp)
+; CHECK-X86-NEXT: movl $0, 8(%esp)
+; CHECK-X86-NEXT: movl $0, 4(%esp)
; CHECK-X86-NEXT: calll many_leading_args at PLT
-; CHECK-X86-NEXT: addl $104, %esp
+; CHECK-X86-NEXT: addl $100, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_many_leading_args:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $112, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 76(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 72(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 68(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 64(%esp)
+; CHECK-MSVC32-NEXT: leal 80(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
+; CHECK-MSVC32-NEXT: movl $0, 60(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 56(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 52(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 48(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 4(%esp)
; CHECK-MSVC32-NEXT: calll _many_leading_args
-; CHECK-MSVC32-NEXT: addl $68, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @many_leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy Prim0, PrimTy %x)
@@ -974,48 +1049,59 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_trailing_arg:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal 12(%esp), %eax
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl 56(%esp)
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $88, %esp
+; CHECK-X86-NEXT: movl 96(%esp), %eax
+; CHECK-X86-NEXT: movl 100(%esp), %ecx
+; CHECK-X86-NEXT: movl 104(%esp), %edx
+; CHECK-X86-NEXT: movl 108(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 60(%esp)
+; CHECK-X86-NEXT: movl %edx, 56(%esp)
+; CHECK-X86-NEXT: movl %ecx, 52(%esp)
+; CHECK-X86-NEXT: movl %eax, 48(%esp)
+; CHECK-X86-NEXT: leal 64(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
+; CHECK-X86-NEXT: movl $0, 32(%esp)
+; CHECK-X86-NEXT: movl $0, 28(%esp)
+; CHECK-X86-NEXT: movl $0, 24(%esp)
+; CHECK-X86-NEXT: movl $0, 20(%esp)
+; CHECK-X86-NEXT: movl $0, 16(%esp)
+; CHECK-X86-NEXT: movl $0, 12(%esp)
+; CHECK-X86-NEXT: movl $0, 8(%esp)
+; CHECK-X86-NEXT: movl $0, 4(%esp)
; CHECK-X86-NEXT: calll trailing_arg at PLT
-; CHECK-X86-NEXT: addl $88, %esp
+; CHECK-X86-NEXT: addl $84, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_trailing_arg:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $96, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 60(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 56(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 52(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 48(%esp)
+; CHECK-MSVC32-NEXT: leal 64(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
+; CHECK-MSVC32-NEXT: movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 4(%esp)
; CHECK-MSVC32-NEXT: calll _trailing_arg
-; CHECK-MSVC32-NEXT: addl $52, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @trailing_arg(i64 0, i64 0, i64 0, i64 0, PrimTy %x)
>From cb407387ba4db54e24a7b9cdb84c8e6e93113c64 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 11 Jul 2025 12:08:55 +0000
Subject: [PATCH 3/3] Bless tests
---
llvm/test/CodeGen/X86/abds-neg.ll | 410 ++--
llvm/test/CodeGen/X86/abds.ll | 390 ++--
llvm/test/CodeGen/X86/abdu-neg.ll | 282 +--
llvm/test/CodeGen/X86/abdu.ll | 225 +-
llvm/test/CodeGen/X86/abs.ll | 55 +-
llvm/test/CodeGen/X86/add-sub-bool.ll | 25 +-
llvm/test/CodeGen/X86/all-ones-vector.ll | 12 +-
llvm/test/CodeGen/X86/arg-copy-elide.ll | 8 +-
llvm/test/CodeGen/X86/avx512fp16-cvt.ll | 42 +-
llvm/test/CodeGen/X86/bitselect.ll | 55 +-
llvm/test/CodeGen/X86/bsf.ll | 144 +-
llvm/test/CodeGen/X86/bsr.ll | 158 +-
llvm/test/CodeGen/X86/bswap-wide-int.ll | 30 +-
.../X86/div-rem-pair-recomposition-signed.ll | 36 +-
.../div-rem-pair-recomposition-unsigned.ll | 94 +-
llvm/test/CodeGen/X86/fp128-cast-strict.ll | 92 +-
llvm/test/CodeGen/X86/fp128-cast.ll | 125 +-
.../test/CodeGen/X86/fp128-libcalls-strict.ll | 2060 ++++++++++-------
llvm/test/CodeGen/X86/fp128-libcalls.ll | 1773 ++++++++------
llvm/test/CodeGen/X86/fshl.ll | 185 +-
llvm/test/CodeGen/X86/fshr.ll | 170 +-
llvm/test/CodeGen/X86/funnel-shift.ll | 74 +-
llvm/test/CodeGen/X86/i128-add.ll | 23 +-
llvm/test/CodeGen/X86/i128-sdiv.ll | 375 ++-
llvm/test/CodeGen/X86/i128-udiv.ll | 597 ++++-
llvm/test/CodeGen/X86/iabs.ll | 43 +-
llvm/test/CodeGen/X86/icmp-shift-opt.ll | 102 +-
llvm/test/CodeGen/X86/mul128.ll | 97 +-
llvm/test/CodeGen/X86/neg-abs.ll | 55 +-
llvm/test/CodeGen/X86/popcnt.ll | 485 ++--
llvm/test/CodeGen/X86/pr46004.ll | 19 +
llvm/test/CodeGen/X86/scalar-fp-to-i32.ll | 76 +-
llvm/test/CodeGen/X86/scalar-fp-to-i64.ll | 76 +-
llvm/test/CodeGen/X86/scmp.ll | 39 +-
llvm/test/CodeGen/X86/sdiv_fix.ll | 99 +-
llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 440 ++--
llvm/test/CodeGen/X86/shift-combine.ll | 14 +-
llvm/test/CodeGen/X86/shift-i128.ll | 72 +-
llvm/test/CodeGen/X86/smax.ll | 78 +-
llvm/test/CodeGen/X86/smin.ll | 81 +-
llvm/test/CodeGen/X86/ucmp.ll | 34 +-
llvm/test/CodeGen/X86/udiv_fix.ll | 28 +-
llvm/test/CodeGen/X86/udiv_fix_sat.ll | 28 +-
llvm/test/CodeGen/X86/umax.ll | 135 +-
llvm/test/CodeGen/X86/umin.ll | 81 +-
.../X86/umulo-128-legalisation-lowering.ll | 6 +-
llvm/test/CodeGen/X86/wide-integer-cmp.ll | 14 +-
47 files changed, 5939 insertions(+), 3603 deletions(-)
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index f6d66ab47ce05..2911edfbfd409 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -367,44 +367,49 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll %eax, %esi
-; X86-NEXT: cmovll %ebx, %edi
-; X86-NEXT: cmovll %ebp, %edx
-; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -438,44 +443,49 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll %eax, %esi
-; X86-NEXT: cmovll %ebx, %edi
-; X86-NEXT: cmovll %ebp, %edx
-; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -639,55 +649,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovll %edx, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovll %esi, %edx
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %ebp, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: sbbl 44(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovll %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmovll 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: cmovll 28(%ebp), %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: cmovll %edi, %ecx
+; X86-NEXT: cmpl %edi, %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl 32(%ebp), %edi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sbbl 36(%ebp), %edi
+; X86-NEXT: cmovll 36(%ebp), %ebx
+; X86-NEXT: cmovll 32(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmovll 28(%ebp), %edi
+; X86-NEXT: cmovll 24(%ebp), %esi
+; X86-NEXT: subl %esi, %ecx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl %edi, 8(%edx)
+; X86-NEXT: movl %esi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -848,37 +862,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovgel (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovgel %ebx, %esi
-; X86-NEXT: cmovgel %ebp, %ecx
-; X86-NEXT: cmovgel %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %ebx
+; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovgel %edi, %esi
+; X86-NEXT: cmovgel %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1118,35 +1136,39 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: subl %edi, %ebp
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %esi
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %ecx
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: subl %esi, %ebx
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1175,35 +1197,39 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: subl %edi, %ebp
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %esi
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %ecx
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: subl %esi, %ebx
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index 0356c2702a419..a1a4ba81ae493 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -343,37 +343,41 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -404,37 +408,41 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -585,37 +593,41 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -768,37 +780,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1027,35 +1043,38 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_subnsw_i128:
@@ -1079,35 +1098,38 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_subnsw_i128_undef:
@@ -1282,37 +1304,41 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_select_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll
index 6bda99c89a37e..b7c34070f1af6 100644
--- a/llvm/test/CodeGen/X86/abdu-neg.ll
+++ b/llvm/test/CodeGen/X86/abdu-neg.ll
@@ -355,39 +355,43 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: xorl %ebp, %ecx
-; X86-NEXT: xorl %ebp, %esi
-; X86-NEXT: xorl %ebp, %ebx
-; X86-NEXT: xorl %ebp, %edx
-; X86-NEXT: subl %ebp, %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: negl %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %eax
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: xorl %ebx, %eax
+; X86-NEXT: xorl %ebx, %esi
+; X86-NEXT: xorl %ebx, %edi
+; X86-NEXT: xorl %ebx, %ecx
+; X86-NEXT: subl %ebx, %ecx
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: negl %ecx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -423,39 +427,43 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: xorl %ebp, %ecx
-; X86-NEXT: xorl %ebp, %esi
-; X86-NEXT: xorl %ebp, %ebx
-; X86-NEXT: xorl %ebp, %edx
-; X86-NEXT: subl %ebp, %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: negl %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: xorl %ebx, %eax
+; X86-NEXT: xorl %ebx, %esi
+; X86-NEXT: xorl %ebx, %edi
+; X86-NEXT: xorl %ebx, %ecx
+; X86-NEXT: subl %ebx, %ecx
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: negl %ecx
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -621,55 +629,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovbl %edx, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovbl %esi, %edx
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %ebp, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: sbbl 44(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmovbl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: cmovbl 28(%ebp), %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: cmovbl %edi, %ecx
+; X86-NEXT: cmpl %edi, %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl 32(%ebp), %edi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sbbl 36(%ebp), %edi
+; X86-NEXT: cmovbl 36(%ebp), %ebx
+; X86-NEXT: cmovbl 32(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmovbl 28(%ebp), %edi
+; X86-NEXT: cmovbl 24(%ebp), %esi
+; X86-NEXT: subl %esi, %ecx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl %edi, 8(%edx)
+; X86-NEXT: movl %esi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -827,39 +839,43 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: xorl %ebp, %ecx
-; X86-NEXT: xorl %ebp, %esi
-; X86-NEXT: xorl %ebp, %ebx
-; X86-NEXT: xorl %ebp, %edx
-; X86-NEXT: subl %ebp, %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: negl %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: xorl %ebx, %eax
+; X86-NEXT: xorl %ebx, %esi
+; X86-NEXT: xorl %ebx, %edi
+; X86-NEXT: xorl %ebx, %ecx
+; X86-NEXT: subl %ebx, %ecx
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: negl %ecx
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 27acec32fd348..043c9155f52f9 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -326,35 +326,38 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_ext_i128:
@@ -381,35 +384,38 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_ext_i128_undef:
@@ -548,35 +554,38 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_minmax_i128:
@@ -717,35 +726,38 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_cmp_i128:
@@ -887,35 +899,38 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_select_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_select_i128:
diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index bae140abdf6b1..e252d5953e60e 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -144,31 +144,34 @@ define i128 @test_i128(i128 %a) nounwind {
;
; X86-LABEL: test_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: subl %edx, %ebx
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = call i128 @llvm.abs.i128(i128 %a, i1 false)
ret i128 %r
@@ -688,13 +691,17 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind {
;
; X86-LABEL: test_sextinreg_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 24(%ebp), %esi
; X86-NEXT: xorl %edx, %esi
; X86-NEXT: subl %edx, %esi
; X86-NEXT: sbbl %edx, %ecx
@@ -702,7 +709,9 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind {
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
+; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%shl = shl i128 %a, 64
%ashr = ashr exact i128 %shl, 64
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index c2bfcf57185e3..1df284fb9fe2c 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -104,18 +104,21 @@ define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind {
define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-LABEL: test_i128_add_add_idx:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: btl $5, {{[0-9]+}}(%esp)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: addl 24(%ebp), %esi
+; X86-NEXT: adcl 28(%ebp), %edi
+; X86-NEXT: adcl 32(%ebp), %ecx
+; X86-NEXT: adcl 36(%ebp), %edx
+; X86-NEXT: btl $5, 64(%ebp)
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %ecx
@@ -124,8 +127,10 @@ define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test_i128_add_add_idx:
diff --git a/llvm/test/CodeGen/X86/all-ones-vector.ll b/llvm/test/CodeGen/X86/all-ones-vector.ll
index d624f6c13e367..a280eedbd43ad 100644
--- a/llvm/test/CodeGen/X86/all-ones-vector.ll
+++ b/llvm/test/CodeGen/X86/all-ones-vector.ll
@@ -272,7 +272,7 @@ define <64 x i8> @allones_v64i8() nounwind {
;
; AVX512-LABEL: allones_v64i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
; AVX512-NEXT: ret{{[l|q]}}
ret <64 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
}
@@ -301,7 +301,7 @@ define <32 x i16> @allones_v32i16() nounwind {
;
; AVX512-LABEL: allones_v32i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
; AVX512-NEXT: ret{{[l|q]}}
ret <32 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
}
@@ -330,7 +330,7 @@ define <16 x i32> @allones_v16i32() nounwind {
;
; AVX512-LABEL: allones_v16i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
; AVX512-NEXT: ret{{[l|q]}}
ret <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
}
@@ -359,7 +359,7 @@ define <8 x i64> @allones_v8i64() nounwind {
;
; AVX512-LABEL: allones_v8i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
; AVX512-NEXT: ret{{[l|q]}}
ret <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
}
@@ -388,7 +388,7 @@ define <8 x double> @allones_v8f64() nounwind {
;
; AVX512-LABEL: allones_v8f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
; AVX512-NEXT: ret{{[l|q]}}
ret <8 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff>
}
@@ -417,7 +417,7 @@ define <16 x float> @allones_v16f32() nounwind {
;
; AVX512-LABEL: allones_v16f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
; AVX512-NEXT: ret{{[l|q]}}
ret <16 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
}
diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll
index 0eb2c630e6818..f13627b55856f 100644
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -188,11 +188,11 @@ define void @split_i128(ptr %sret, i128 %x) {
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: subl $48, %esp
-; CHECK-NEXT: movl 12(%ebp), %eax
+; CHECK-NEXT: movl 24(%ebp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl 16(%ebp), %ebx
-; CHECK-NEXT: movl 20(%ebp), %esi
-; CHECK-NEXT: movl 24(%ebp), %edi
+; CHECK-NEXT: movl 28(%ebp), %ebx
+; CHECK-NEXT: movl 32(%ebp), %esi
+; CHECK-NEXT: movl 36(%ebp), %edi
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%esp)
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
index f66f0c0ceabc4..cc58bc1e44f37 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -628,13 +628,19 @@ define half @s128_to_half(i128 %x) {
;
; X86-LABEL: s128_to_half:
; X86: # %bb.0:
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmovups 8(%ebp), %xmm0
; X86-NEXT: vmovups %xmm0, (%esp)
; X86-NEXT: calll __floattihf
-; X86-NEXT: addl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
%a = sitofp i128 %x to half
ret half %a
@@ -713,13 +719,19 @@ define half @u128_to_half(i128 %x) {
;
; X86-LABEL: u128_to_half:
; X86: # %bb.0:
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmovups 8(%ebp), %xmm0
; X86-NEXT: vmovups %xmm0, (%esp)
; X86-NEXT: calll __floatuntihf
-; X86-NEXT: addl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
%a = uitofp i128 %x to half
ret half %a
@@ -1020,11 +1032,15 @@ define half @f128_to_half(fp128 %x) nounwind {
;
; X86-LABEL: f128_to_half:
; X86: # %bb.0:
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmovups 8(%ebp), %xmm0
; X86-NEXT: vmovups %xmm0, (%esp)
; X86-NEXT: calll __trunctfhf2
-; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
%a = fptrunc fp128 %x to half
ret half %a
diff --git a/llvm/test/CodeGen/X86/bitselect.ll b/llvm/test/CodeGen/X86/bitselect.ll
index 4fc0827ac4dd6..33381313d3c19 100644
--- a/llvm/test/CodeGen/X86/bitselect.ll
+++ b/llvm/test/CodeGen/X86/bitselect.ll
@@ -146,37 +146,40 @@ define i64 @bitselect_i64(i64 %a, i64 %b, i64 %m) nounwind {
define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind {
; X86-LABEL: bitselect_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edi, %ecx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: andl 56(%ebp), %ecx
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: andl 60(%ebp), %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl 48(%ebp), %edi
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: andl 64(%ebp), %edi
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: andl 68(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-NOBMI-LABEL: bitselect_i128:
diff --git a/llvm/test/CodeGen/X86/bsf.ll b/llvm/test/CodeGen/X86/bsf.ll
index 312f94c041235..143e10e6909e4 100644
--- a/llvm/test/CodeGen/X86/bsf.ll
+++ b/llvm/test/CodeGen/X86/bsf.ll
@@ -263,70 +263,78 @@ define i128 @cmov_bsf128(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsf128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: orl %ebp, %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: orl %eax, %esi
-; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: orl %eax, %edx
; X86-NEXT: je .LBB8_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: jne .LBB8_3
; X86-NEXT: # %bb.4: # %cond.false
-; X86-NEXT: rep bsfl %edi, %esi
-; X86-NEXT: addl $32, %esi
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB8_7
-; X86-NEXT: .LBB8_6:
-; X86-NEXT: rep bsfl %eax, %edx
-; X86-NEXT: jmp .LBB8_8
+; X86-NEXT: rep bsfl %esi, %eax
+; X86-NEXT: addl $32, %eax
+; X86-NEXT: jmp .LBB8_5
; X86-NEXT: .LBB8_1:
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: xorl %ebp, %ebp
-; X86-NEXT: movl $128, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: movl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: jmp .LBB8_11
; X86-NEXT: .LBB8_3:
-; X86-NEXT: rep bsfl %ecx, %esi
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: rep bsfl %ecx, %eax
+; X86-NEXT: .LBB8_5: # %cond.false
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB8_6
-; X86-NEXT: .LBB8_7: # %cond.false
-; X86-NEXT: rep bsfl %ebp, %edx
+; X86-NEXT: # %bb.7: # %cond.false
+; X86-NEXT: rep bsfl %ebx, %edx
; X86-NEXT: addl $32, %edx
+; X86-NEXT: jmp .LBB8_8
+; X86-NEXT: .LBB8_6:
+; X86-NEXT: rep bsfl %edi, %edx
; X86-NEXT: .LBB8_8: # %cond.false
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: orl %esi, %edi
; X86-NEXT: jne .LBB8_10
; X86-NEXT: # %bb.9: # %cond.false
; X86-NEXT: addl $64, %edx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: .LBB8_10: # %cond.false
-; X86-NEXT: xorl %ebp, %ebp
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: .LBB8_11: # %cond.end
-; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: orl %ecx, %edi
-; X86-NEXT: jne .LBB8_13
-; X86-NEXT: # %bb.12:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: .LBB8_13: # %cond.end
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, 12(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: orl 32(%ebp), %ecx
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: je .LBB8_12
+; X86-NEXT: # %bb.13: # %cond.end
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jmp .LBB8_14
+; X86-NEXT: .LBB8_12:
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: .LBB8_14: # %cond.end
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -361,46 +369,49 @@ define i128 @cmov_bsf128_undef(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsf128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: orl %ebx, %ebp
-; X86-NEXT: orl %edi, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: je .LBB9_11
; X86-NEXT: # %bb.1: # %select.true.sink
; X86-NEXT: testl %edx, %edx
; X86-NEXT: jne .LBB9_2
; X86-NEXT: # %bb.3: # %select.true.sink
-; X86-NEXT: rep bsfl %ecx, %edi
-; X86-NEXT: addl $32, %edi
-; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: rep bsfl %ecx, %ebx
+; X86-NEXT: addl $32, %ebx
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: je .LBB9_6
; X86-NEXT: .LBB9_5:
-; X86-NEXT: rep bsfl %ebx, %esi
+; X86-NEXT: rep bsfl %edi, %esi
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: je .LBB9_8
; X86-NEXT: jmp .LBB9_9
; X86-NEXT: .LBB9_11: # %select.end
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 52(%ebp), %ecx
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: jmp .LBB9_10
; X86-NEXT: .LBB9_2:
-; X86-NEXT: rep bsfl %edx, %edi
-; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: rep bsfl %edx, %ebx
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB9_5
; X86-NEXT: .LBB9_6: # %select.true.sink
; X86-NEXT: rep bsfl %esi, %esi
@@ -409,13 +420,14 @@ define i128 @cmov_bsf128_undef(i128 %x, i128 %y) nounwind {
; X86-NEXT: jne .LBB9_9
; X86-NEXT: .LBB9_8: # %select.true.sink
; X86-NEXT: addl $64, %esi
-; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %esi, %ebx
; X86-NEXT: .LBB9_9: # %select.true.sink
-; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %ebx, (%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: .LBB9_10: # %select.true.sink
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/bsr.ll b/llvm/test/CodeGen/X86/bsr.ll
index fbca4af425eac..ab0478a4e944b 100644
--- a/llvm/test/CodeGen/X86/bsr.ll
+++ b/llvm/test/CodeGen/X86/bsr.ll
@@ -291,79 +291,80 @@ define i128 @cmov_bsr128(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsr128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: orl %ebp, %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: orl %ebx, %esi
-; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: orl %eax, %edx
; X86-NEXT: je .LBB8_1
; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: testl %ebp, %ebp
+; X86-NEXT: testl %esi, %esi
; X86-NEXT: jne .LBB8_3
; X86-NEXT: # %bb.4: # %cond.false
-; X86-NEXT: bsrl %ebx, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: orl $32, %edx
+; X86-NEXT: bsrl %ebx, %esi
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: je .LBB8_7
; X86-NEXT: .LBB8_6:
-; X86-NEXT: bsrl %edi, %esi
-; X86-NEXT: xorl $31, %esi
+; X86-NEXT: bsrl %edi, %eax
+; X86-NEXT: xorl $31, %eax
; X86-NEXT: jmp .LBB8_8
; X86-NEXT: .LBB8_1:
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill
-; X86-NEXT: movl $128, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl $128, %esi
; X86-NEXT: jmp .LBB8_11
; X86-NEXT: .LBB8_3:
-; X86-NEXT: bsrl %ebp, %edx
-; X86-NEXT: xorl $31, %edx
+; X86-NEXT: bsrl %esi, %esi
+; X86-NEXT: xorl $31, %esi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB8_6
; X86-NEXT: .LBB8_7: # %cond.false
-; X86-NEXT: bsrl %ecx, %esi
-; X86-NEXT: xorl $31, %esi
-; X86-NEXT: orl $32, %esi
+; X86-NEXT: bsrl %ecx, %eax
+; X86-NEXT: xorl $31, %eax
+; X86-NEXT: orl $32, %eax
; X86-NEXT: .LBB8_8: # %cond.false
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: orl %ebp, %ebx
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: orl 36(%ebp), %edx
; X86-NEXT: jne .LBB8_10
; X86-NEXT: # %bb.9: # %cond.false
-; X86-NEXT: orl $64, %esi
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: orl $64, %eax
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: .LBB8_10: # %cond.false
-; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .LBB8_11: # %cond.end
-; X86-NEXT: xorl %esi, %esi
; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: orl %ebp, %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: orl 32(%ebp), %ecx
+; X86-NEXT: orl 36(%ebp), %edi
; X86-NEXT: orl %ecx, %edi
; X86-NEXT: je .LBB8_12
; X86-NEXT: # %bb.13: # %cond.end
-; X86-NEXT: xorl $127, %edx
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT: xorl $127, %esi
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: jmp .LBB8_14
; X86-NEXT: .LBB8_12:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: movl 48(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %esi
; X86-NEXT: .LBB8_14: # %cond.end
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -398,62 +399,67 @@ define i128 @cmov_bsr128_undef(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsr128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: testl %edi, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: testl %eax, %eax
; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2:
-; X86-NEXT: bsrl %esi, %ecx
-; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: orl $32, %ecx
+; X86-NEXT: bsrl %edi, %esi
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
; X86-NEXT: jmp .LBB9_3
; X86-NEXT: .LBB9_1:
-; X86-NEXT: bsrl %edi, %ecx
-; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: bsrl %eax, %esi
+; X86-NEXT: xorl $31, %esi
; X86-NEXT: .LBB9_3:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl 24(%ebp), %ebx
; X86-NEXT: testl %edx, %edx
; X86-NEXT: jne .LBB9_4
; X86-NEXT: # %bb.5:
-; X86-NEXT: bsrl %ebx, %ebp
-; X86-NEXT: xorl $31, %ebp
-; X86-NEXT: orl $32, %ebp
-; X86-NEXT: jmp .LBB9_6
+; X86-NEXT: bsrl %ebx, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: orl $32, %ecx
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: je .LBB9_7
+; X86-NEXT: jmp .LBB9_8
; X86-NEXT: .LBB9_4:
-; X86-NEXT: bsrl %edx, %ebp
-; X86-NEXT: xorl $31, %ebp
-; X86-NEXT: .LBB9_6:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %edi, %esi
+; X86-NEXT: bsrl %edx, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: orl %eax, %edi
; X86-NEXT: jne .LBB9_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: orl $64, %ebp
-; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: .LBB9_7:
+; X86-NEXT: orl $64, %ecx
+; X86-NEXT: movl %ecx, %esi
; X86-NEXT: .LBB9_8:
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl 32(%ebp), %ebx
; X86-NEXT: orl %edx, %ebx
; X86-NEXT: jne .LBB9_9
; X86-NEXT: # %bb.10:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 52(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %ecx
; X86-NEXT: jmp .LBB9_11
; X86-NEXT: .LBB9_9:
-; X86-NEXT: xorl $127, %ecx
+; X86-NEXT: xorl $127, %esi
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: xorl %esi, %esi
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: .LBB9_11:
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/bswap-wide-int.ll b/llvm/test/CodeGen/X86/bswap-wide-int.ll
index 6d5e995a6d574..673b7f16de75c 100644
--- a/llvm/test/CodeGen/X86/bswap-wide-int.ll
+++ b/llvm/test/CodeGen/X86/bswap-wide-int.ll
@@ -41,13 +41,16 @@ define i64 @bswap_i64(i64 %a0) nounwind {
define i128 @bswap_i128(i128 %a0) nounwind {
; X86-LABEL: bswap_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: bswapl %edi
; X86-NEXT: bswapl %esi
; X86-NEXT: bswapl %edx
@@ -56,25 +59,32 @@ define i128 @bswap_i128(i128 %a0) nounwind {
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X86-MOVBE-LABEL: bswap_i128:
; X86-MOVBE: # %bb.0:
+; X86-MOVBE-NEXT: pushl %ebp
+; X86-MOVBE-NEXT: movl %esp, %ebp
; X86-MOVBE-NEXT: pushl %edi
; X86-MOVBE-NEXT: pushl %esi
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-MOVBE-NEXT: andl $-16, %esp
+; X86-MOVBE-NEXT: movl 8(%ebp), %eax
+; X86-MOVBE-NEXT: movl 32(%ebp), %ecx
+; X86-MOVBE-NEXT: movl 36(%ebp), %edx
+; X86-MOVBE-NEXT: movl 24(%ebp), %esi
+; X86-MOVBE-NEXT: movl 28(%ebp), %edi
; X86-MOVBE-NEXT: movbel %esi, 12(%eax)
; X86-MOVBE-NEXT: movbel %edi, 8(%eax)
; X86-MOVBE-NEXT: movbel %ecx, 4(%eax)
; X86-MOVBE-NEXT: movbel %edx, (%eax)
+; X86-MOVBE-NEXT: leal -8(%ebp), %esp
; X86-MOVBE-NEXT: popl %esi
; X86-MOVBE-NEXT: popl %edi
+; X86-MOVBE-NEXT: popl %ebp
; X86-MOVBE-NEXT: retl $4
;
; X64-LABEL: bswap_i128:
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index d869f8ec01a5a..661e7bb19641c 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -152,17 +152,17 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $176, %esp
-; X86-NEXT: movl 20(%ebp), %edx
-; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: xorl %eax, %edx
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %edx
; X86-NEXT: xorl %eax, %edx
-; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %ecx
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -172,16 +172,16 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 48(%ebp), %ecx
; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %ebx
; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: xorl %edx, %edi
; X86-NEXT: subl %edx, %edi
; X86-NEXT: sbbl %edx, %ebx
@@ -488,13 +488,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: sbbl %ecx, %ebx
; X86-NEXT: sbbl %ecx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 44(%ebp), %ecx
+; X86-NEXT: movl 56(%ebp), %ecx
; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: movl %eax, 4(%ecx)
; X86-NEXT: movl %ebx, 8(%ecx)
; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %ecx
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -508,7 +508,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -523,17 +523,17 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: imull %eax, %ebx
; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: imull %esi, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: addl %ebx, %edi
-; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: imull %edx, %ebx
; X86-NEXT: mull %edx
@@ -543,13 +543,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: adcl %edi, %ebx
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %edx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl 16(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl 20(%ebp), %edi
+; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %esi
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, (%eax)
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 7bbddefd82721..53a3c0c98b1e3 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -152,26 +152,26 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $160, %esp
-; X86-NEXT: movl 28(%ebp), %ebx
-; X86-NEXT: movl 40(%ebp), %esi
-; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: orl 36(%ebp), %ecx
+; X86-NEXT: orl 48(%ebp), %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %cl
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: orl 24(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: orl 20(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: orl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: orl 32(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: sete %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: bsrl %esi, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl 36(%ebp), %ecx
+; X86-NEXT: bsrl 48(%ebp), %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: testl %esi, %esi
@@ -184,28 +184,28 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %edx, %eax
; X86-NEXT: addl $64, %eax
-; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl 48(%ebp), %edx
; X86-NEXT: orl %esi, %edx
; X86-NEXT: cmovnel %ecx, %eax
-; X86-NEXT: movl 24(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ebx
; X86-NEXT: bsrl %ebx, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: movl 32(%ebp), %ecx
; X86-NEXT: bsrl %ecx, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl 16(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: bsrl %edi, %esi
; X86-NEXT: xorl $31, %esi
-; X86-NEXT: bsrl 12(%ebp), %edx
+; X86-NEXT: bsrl 24(%ebp), %edx
; X86-NEXT: xorl $31, %edx
; X86-NEXT: addl $32, %edx
; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %esi, %edx
; X86-NEXT: addl $64, %edx
-; X86-NEXT: movl 20(%ebp), %edi
+; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: movl %edi, %esi
; X86-NEXT: orl %ebx, %esi
; X86-NEXT: cmovnel %ecx, %edx
@@ -237,30 +237,30 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: testb %cl, %cl
; X86-NEXT: movb %cl, %ah
-; X86-NEXT: movl 24(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ebx
; X86-NEXT: movl $0, %esi
; X86-NEXT: cmovnel %esi, %ebx
; X86-NEXT: movl %edi, %ecx
; X86-NEXT: cmovnel %esi, %ecx
; X86-NEXT: movl $0, %edx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: cmovnel %edx, %esi
-; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, %ecx
; X86-NEXT: cmovnel %edx, %ecx
; X86-NEXT: orb %ah, %al
-; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 56(%ebp), %eax
; X86-NEXT: jne .LBB4_7
; X86-NEXT: # %bb.1: # %udiv-bb1
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 20(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %edx
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, %ecx
@@ -289,7 +289,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl 20(%ebp), %ebx
+; X86-NEXT: movl 32(%ebp), %ebx
; X86-NEXT: jae .LBB4_2
; X86-NEXT: # %bb.5:
; X86-NEXT: xorl %edx, %edx
@@ -299,13 +299,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: .LBB4_2: # %udiv-preheader
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %edx
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %edx
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -334,16 +334,16 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrdl %cl, %eax, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl 44(%ebp), %eax
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %esi
; X86-NEXT: adcl $-1, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl 52(%ebp), %eax
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: xorl %eax, %eax
@@ -391,13 +391,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: andl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl 40(%ebp), %eax
+; X86-NEXT: andl 52(%ebp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl 36(%ebp), %eax
+; X86-NEXT: andl 48(%ebp), %eax
; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: andl 32(%ebp), %edx
-; X86-NEXT: andl 28(%ebp), %ecx
+; X86-NEXT: andl 44(%ebp), %edx
+; X86-NEXT: andl 40(%ebp), %ecx
; X86-NEXT: subl %ecx, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -437,7 +437,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: orl %eax, %esi
; X86-NEXT: addl %ecx, %ecx
; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 56(%ebp), %eax
; X86-NEXT: .LBB4_7: # %udiv-end
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
@@ -446,23 +446,23 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %ebx, 12(%eax)
; X86-NEXT: movl %esi, %edx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: imull %edx, %esi
; X86-NEXT: mull %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl 40(%ebp), %edi
+; X86-NEXT: movl 52(%ebp), %edi
; X86-NEXT: imull %ecx, %edi
; X86-NEXT: addl %edx, %edi
-; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: imull 28(%ebp), %ebx
+; X86-NEXT: imull 40(%ebp), %ebx
; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edx
; X86-NEXT: imull %edx, %esi
; X86-NEXT: addl %ebx, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -471,7 +471,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -483,26 +483,26 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull 32(%ebp)
-; X86-NEXT: movl 16(%ebp), %esi
+; X86-NEXT: mull 44(%ebp)
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: setb %cl
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull 32(%ebp)
+; X86-NEXT: mull 44(%ebp)
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl 12(%ebp), %ebx
+; X86-NEXT: movl 24(%ebp), %ebx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl 20(%ebp), %edi
+; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ebx, (%eax)
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
index 707b05f3478db..bb5640aeb66fa 100644
--- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -481,18 +481,21 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixtfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -501,7 +504,7 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -620,18 +623,21 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixunstfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -640,7 +646,7 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -818,18 +824,21 @@ define fp128 @sitofp_i128(i128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floattitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -838,7 +847,7 @@ define fp128 @sitofp_i128(i128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -1016,18 +1025,21 @@ define fp128 @uitofp_i128(i128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floatuntitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1036,7 +1048,7 @@ define fp128 @uitofp_i128(i128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index 1de2484d47ba1..6d4ec063ccd46 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -415,16 +415,20 @@ define dso_local void @TestFPToSIF128_I128() nounwind {
; X86-LABEL: TestFPToSIF128_I128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vf128, %eax
+; X86-NEXT: movl vf128+4, %ecx
+; X86-NEXT: movl vf128+8, %edx
+; X86-NEXT: movl vf128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixtfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -432,7 +436,7 @@ define dso_local void @TestFPToSIF128_I128() nounwind {
; X86-NEXT: movl %edx, vi128+8
; X86-NEXT: movl %ecx, vi128+4
; X86-NEXT: movl %eax, vi128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -466,16 +470,20 @@ define dso_local void @TestFPToUIF128_U128() nounwind {
; X86-LABEL: TestFPToUIF128_U128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vf128, %eax
+; X86-NEXT: movl vf128+4, %ecx
+; X86-NEXT: movl vf128+8, %edx
+; X86-NEXT: movl vf128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixunstfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -483,7 +491,7 @@ define dso_local void @TestFPToUIF128_U128() nounwind {
; X86-NEXT: movl %edx, vu128+8
; X86-NEXT: movl %ecx, vu128+4
; X86-NEXT: movl %eax, vu128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -913,16 +921,20 @@ define dso_local void @TestSIToFPI128_F128() nounwind {
; X86-LABEL: TestSIToFPI128_F128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vi128, %eax
+; X86-NEXT: movl vi128+4, %ecx
+; X86-NEXT: movl vi128+8, %edx
+; X86-NEXT: movl vi128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vi128+12
-; X86-NEXT: pushl vi128+8
-; X86-NEXT: pushl vi128+4
-; X86-NEXT: pushl vi128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floattitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -930,7 +942,7 @@ define dso_local void @TestSIToFPI128_F128() nounwind {
; X86-NEXT: movl %edx, vf128+8
; X86-NEXT: movl %ecx, vf128+4
; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -964,16 +976,20 @@ define dso_local void @TestUIToFPU128_F128() #2 {
; X86-LABEL: TestUIToFPU128_F128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vu128, %eax
+; X86-NEXT: movl vu128+4, %ecx
+; X86-NEXT: movl vu128+8, %edx
+; X86-NEXT: movl vu128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vu128+12
-; X86-NEXT: pushl vu128+8
-; X86-NEXT: pushl vu128+4
-; X86-NEXT: pushl vu128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floatuntitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -981,7 +997,7 @@ define dso_local void @TestUIToFPU128_F128() #2 {
; X86-NEXT: movl %edx, vf128+8
; X86-NEXT: movl %ecx, vf128+4
; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -1134,33 +1150,30 @@ define dso_local i32 @TestBits128(fp128 %ld) nounwind {
;
; X86-LABEL: TestBits128:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $72, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: subl $12, %esp
-; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %edx
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: orl (%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sete %al
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $72, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-AVX-LABEL: TestBits128:
@@ -1359,12 +1372,14 @@ define i1 @PR34866(i128 %x) nounwind {
;
; X86-LABEL: PR34866:
; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: sete %al
+; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-AVX-LABEL: PR34866:
@@ -1394,12 +1409,14 @@ define i1 @PR34866_commute(i128 %x) nounwind {
;
; X86-LABEL: PR34866_commute:
; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: sete %al
+; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-AVX-LABEL: PR34866_commute:
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index a7eea04181f60..ad2d690fd7ed0 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -41,27 +41,40 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: add:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __addtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: add:
@@ -81,24 +94,32 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___addtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -107,9 +128,10 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -141,27 +163,40 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: sub:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __subtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: sub:
@@ -181,24 +216,32 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___subtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -207,9 +250,10 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -241,27 +285,40 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: mul:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: mul:
@@ -281,24 +338,32 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___multf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -307,9 +372,10 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -341,27 +407,40 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: div:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __divtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: div:
@@ -381,24 +460,32 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___divtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -407,9 +494,10 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -434,31 +522,48 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
;
; X86-LABEL: fma:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $92, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmaf128
-; X86-NEXT: addl $60, %esp
-; X86-NEXT: movaps (%esp), %xmm0
-; X86-NEXT: movaps %xmm0, (%esi)
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%ebp)
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: addl $92, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: fma:
@@ -481,28 +586,40 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $96, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 56(%ebp)
-; WIN-X86-NEXT: pushl 52(%ebp)
-; WIN-X86-NEXT: pushl 48(%ebp)
-; WIN-X86-NEXT: pushl 44(%ebp)
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 52(%ebp), %ebx
+; WIN-X86-NEXT: movl 56(%ebp), %edi
+; WIN-X86-NEXT: movl 60(%ebp), %edx
+; WIN-X86-NEXT: movl 64(%ebp), %ecx
+; WIN-X86-NEXT: movl 68(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 48(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 44(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 40(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmal
-; WIN-X86-NEXT: addl $52, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -511,9 +628,10 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -538,27 +656,40 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: frem:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmodf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: frem:
@@ -578,24 +709,32 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmodl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -604,9 +743,10 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -631,23 +771,28 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
;
; X86-LABEL: ceil:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll ceilf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: ceil:
@@ -667,17 +812,20 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _ceill
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -713,23 +861,28 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
;
; X86-LABEL: acos:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll acosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: acos:
@@ -749,17 +902,20 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _acosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -795,23 +951,28 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
;
; X86-LABEL: cos:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll cosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: cos:
@@ -831,17 +992,20 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _cosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -877,23 +1041,28 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
;
; X86-LABEL: cosh:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll coshf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: cosh:
@@ -913,17 +1082,20 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _coshl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -959,23 +1131,28 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
;
; X86-LABEL: exp:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll expf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: exp:
@@ -995,17 +1172,20 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _expl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1041,23 +1221,28 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
;
; X86-LABEL: exp2:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll exp2f128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: exp2:
@@ -1077,17 +1262,20 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _exp2l
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1123,23 +1311,28 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
;
; X86-LABEL: floor:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll floorf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: floor:
@@ -1159,17 +1352,20 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _floorl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1205,23 +1401,28 @@ define fp128 @log(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll logf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: log:
@@ -1241,17 +1442,20 @@ define fp128 @log(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _logl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1287,23 +1491,28 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log10:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll log10f128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: log10:
@@ -1323,17 +1532,20 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _log10l
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1369,23 +1581,28 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log2:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll log2f128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: log2:
@@ -1405,17 +1622,20 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _log2l
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1451,27 +1671,40 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: maxnum:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmaxf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: maxnum:
@@ -1491,24 +1724,32 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmaxl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1517,9 +1758,10 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1544,27 +1786,40 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: minnum:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fminf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: minnum:
@@ -1584,24 +1839,32 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fminl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1610,9 +1873,10 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1637,23 +1901,28 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
;
; X86-LABEL: nearbyint:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll nearbyintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: nearbyint:
@@ -1673,17 +1942,20 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _nearbyintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1719,27 +1991,40 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: pow:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll powf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: pow:
@@ -1759,24 +2044,32 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _powl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1785,9 +2078,10 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1819,24 +2113,32 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
;
; X86-LABEL: powi:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $64, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __powitf2
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $64, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
; WIN-LABEL: powi:
@@ -1853,21 +2155,26 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___powitf2
-; WIN-X86-NEXT: addl $24, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1876,9 +2183,10 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1903,23 +2211,28 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
;
; X86-LABEL: rint:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll rintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: rint:
@@ -1939,17 +2252,20 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _rintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1985,23 +2301,28 @@ define fp128 @round(fp128 %x) nounwind strictfp {
;
; X86-LABEL: round:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll roundf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: round:
@@ -2021,17 +2342,20 @@ define fp128 @round(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _roundl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2067,23 +2391,28 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
;
; X86-LABEL: roundeven:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll roundevenf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: roundeven:
@@ -2103,17 +2432,20 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _roundevenl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2149,23 +2481,28 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
;
; X86-LABEL: asin:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll asinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: asin:
@@ -2185,17 +2522,20 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _asinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2231,23 +2571,28 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sin:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: sin:
@@ -2267,17 +2612,20 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2313,23 +2661,28 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sinh:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: sinh:
@@ -2349,17 +2702,20 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2395,23 +2751,28 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sqrt:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sqrtf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: sqrt:
@@ -2431,17 +2792,20 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sqrtl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2477,23 +2841,28 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
;
; X86-LABEL: atan:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: atan:
@@ -2513,17 +2882,20 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2559,27 +2931,40 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: atan2:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atan2f128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: atan2:
@@ -2599,24 +2984,32 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atan2l
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2625,9 +3018,10 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -2652,23 +3046,28 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
;
; X86-LABEL: tan:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: tan:
@@ -2688,17 +3087,20 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2734,23 +3136,28 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
;
; X86-LABEL: tanh:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: tanh:
@@ -2770,17 +3177,20 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2816,23 +3226,28 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
;
; X86-LABEL: trunc:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll truncf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: trunc:
@@ -2852,17 +3267,20 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _truncl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2919,12 +3337,18 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: lrint:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _lrintl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -2969,12 +3393,18 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: llrint:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _llrintl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -3019,12 +3449,18 @@ define i32 @lround(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: lround:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _lroundl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -3069,12 +3505,18 @@ define i64 @llround(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: llround:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _llroundl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -3176,26 +3618,32 @@ define i64 @cmp(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
;
; WIN-X86-LABEL: cmp:
; WIN-X86: # %bb.0:
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
+; WIN-X86-NEXT: pushl 36(%ebp)
+; WIN-X86-NEXT: pushl 32(%ebp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: je LBB37_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB37_3
; WIN-X86-NEXT: LBB37_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB37_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmp.f128(
fp128 %x, fp128 %y,
@@ -3300,26 +3748,32 @@ define i64 @cmps(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
;
; WIN-X86-LABEL: cmps:
; WIN-X86: # %bb.0:
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
+; WIN-X86-NEXT: pushl 36(%ebp)
+; WIN-X86-NEXT: pushl 32(%ebp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: je LBB38_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB38_3
; WIN-X86-NEXT: LBB38_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB38_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmps.f128(
fp128 %x, fp128 %y,
@@ -3496,44 +3950,47 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-LABEL: cmp_ueq_q:
; WIN-X86: # %bb.0:
; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: movl 32(%ebp), %edi
+; WIN-X86-NEXT: movl 36(%ebp), %esi
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: sete %bl
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___unordtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: orb %bl, %al
; WIN-X86-NEXT: jne LBB39_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB39_3
; WIN-X86-NEXT: LBB39_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB39_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
; WIN-X86-NEXT: popl %ebx
@@ -3716,32 +4173,34 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-LABEL: cmp_one_q:
; WIN-X86: # %bb.0:
; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: movl 32(%ebp), %edi
+; WIN-X86-NEXT: movl 36(%ebp), %esi
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: setne %bl
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___unordtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
@@ -3749,13 +4208,14 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-NEXT: testb %bl, %al
; WIN-X86-NEXT: jne LBB40_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB40_3
; WIN-X86-NEXT: LBB40_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB40_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
; WIN-X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index f727a79078627..4b0449fd7502e 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -42,22 +42,38 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Add:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __addtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Add:
@@ -78,22 +94,31 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___addtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -101,8 +126,10 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -144,22 +171,38 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Add:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __addtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Add:
@@ -180,22 +223,31 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___addtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -203,8 +255,10 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -241,22 +295,38 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __subtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Sub:
@@ -277,22 +347,31 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___subtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -300,8 +379,10 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -343,22 +424,38 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __subtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Sub:
@@ -379,22 +476,31 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___subtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -402,8 +508,10 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -440,22 +548,38 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Mul:
@@ -476,22 +600,31 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___multf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -499,8 +632,10 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -542,22 +677,38 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Mul:
@@ -578,22 +729,31 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___multf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -601,8 +761,10 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -639,22 +801,38 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Div:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __divtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Div:
@@ -675,22 +853,31 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___divtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -698,8 +885,10 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -741,22 +930,38 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Div:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __divtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Div:
@@ -777,22 +982,31 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___divtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -800,8 +1014,10 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -830,22 +1046,38 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Rem:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmodf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Rem:
@@ -866,22 +1098,31 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmodl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -889,8 +1130,10 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -922,22 +1165,38 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Rem:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmodf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Rem:
@@ -958,22 +1217,31 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmodl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -981,8 +1249,10 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1011,18 +1281,24 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Sqrt:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sqrtf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Sqrt:
@@ -1042,16 +1318,19 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sqrtl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1089,18 +1368,24 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Sin:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Sin:
@@ -1120,16 +1405,19 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1167,18 +1455,24 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Cos:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll cosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Cos:
@@ -1198,16 +1492,19 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _cosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1245,18 +1542,24 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Ceil:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll ceilf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Ceil:
@@ -1276,16 +1579,19 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _ceill
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1323,18 +1629,24 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Floor:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll floorf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Floor:
@@ -1354,16 +1666,19 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _floorl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1401,18 +1716,24 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Trunc:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll truncf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Trunc:
@@ -1432,16 +1753,19 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _truncl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1479,18 +1803,24 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Nearbyint:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll nearbyintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Nearbyint:
@@ -1510,16 +1840,19 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _nearbyintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1557,18 +1890,24 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Rint:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll rintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Rint:
@@ -1588,16 +1927,19 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _rintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1635,18 +1977,24 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Round:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll roundf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Round:
@@ -1666,16 +2014,19 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _roundl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1705,31 +2056,48 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
;
; X86-LABEL: Test128FMA:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $92, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmaf128
-; X86-NEXT: addl $60, %esp
-; X86-NEXT: movaps (%esp), %xmm0
-; X86-NEXT: movaps %xmm0, (%esi)
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%ebp)
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: addl $92, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128FMA:
@@ -1752,28 +2120,40 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $96, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 56(%ebp)
-; WIN-X86-NEXT: pushl 52(%ebp)
-; WIN-X86-NEXT: pushl 48(%ebp)
-; WIN-X86-NEXT: pushl 44(%ebp)
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 52(%ebp), %ebx
+; WIN-X86-NEXT: movl 56(%ebp), %edi
+; WIN-X86-NEXT: movl 60(%ebp), %edx
+; WIN-X86-NEXT: movl 64(%ebp), %ecx
+; WIN-X86-NEXT: movl 68(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 48(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 44(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 40(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmal
-; WIN-X86-NEXT: addl $52, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1782,9 +2162,10 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1804,23 +2185,28 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
;
; X86-LABEL: Test128Acos:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll acosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Acos:
@@ -1840,17 +2226,20 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _acosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1879,23 +2268,28 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
;
; X86-LABEL: Test128Asin:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll asinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Asin:
@@ -1915,17 +2309,20 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _asinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1954,23 +2351,28 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
;
; X86-LABEL: Test128Atan:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Atan:
@@ -1990,17 +2392,20 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2029,27 +2434,40 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
;
; X86-LABEL: Test128Atan2:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atan2f128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Atan2:
@@ -2069,24 +2487,32 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
; WIN-X86: # %bb.0:
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atan2l
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2095,9 +2521,10 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
%x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b)
@@ -2115,23 +2542,28 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
;
; X86-LABEL: Test128Cosh:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll coshf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Cosh:
@@ -2151,17 +2583,20 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _coshl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2190,23 +2625,28 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
;
; X86-LABEL: Test128Sinh:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Sinh:
@@ -2226,17 +2666,20 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2265,23 +2708,28 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
;
; X86-LABEL: Test128Tan:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Tan:
@@ -2301,17 +2749,20 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2340,23 +2791,28 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
;
; X86-LABEL: Test128Tanh:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Tanh:
@@ -2376,17 +2832,20 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2425,27 +2884,34 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
;
; X86-LABEL: Test128Modf:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: leal {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %ecx
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll modff128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm1
; X86-NEXT: movaps %xmm1, 16(%esi)
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $40, %esp
+; X86-NEXT: addl $80, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Modf:
@@ -2468,18 +2934,21 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: subl $112, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ebx
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; WIN-X86-NEXT: pushl %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %ecx
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _modfl
-; WIN-X86-NEXT: addl $24, %esp
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index e8c8ccfa8d37f..ec1b8a3c8d6d9 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -264,53 +264,62 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-LABEL: var_shift_i128:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: pushl %ebp
+; X86-FAST-NEXT: movl %esp, %ebp
; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $-16, %esp
+; X86-FAST-NEXT: subl $16, %esp
+; X86-FAST-NEXT: movl 24(%ebp), %edi
+; X86-FAST-NEXT: movl 28(%ebp), %edx
+; X86-FAST-NEXT: movl 48(%ebp), %esi
+; X86-FAST-NEXT: movl 56(%ebp), %ecx
; X86-FAST-NEXT: testb $64, %cl
+; X86-FAST-NEXT: movl 52(%ebp), %eax
; X86-FAST-NEXT: jne .LBB6_1
; X86-FAST-NEXT: # %bb.2:
-; X86-FAST-NEXT: movl %ebx, %ebp
; X86-FAST-NEXT: movl %esi, %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl %edi, %eax
-; X86-FAST-NEXT: movl %edx, %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: movl %edi, %esi
+; X86-FAST-NEXT: movl 32(%ebp), %edi
+; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl %edx, %eax
+; X86-FAST-NEXT: movl 36(%ebp), %edx
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %esi, %edx
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: movl %ebx, %edi
-; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %esi, %eax
+; X86-FAST-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-FAST-NEXT: jmp .LBB6_6
; X86-FAST-NEXT: .LBB6_1:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movl 44(%ebp), %ebx
+; X86-FAST-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl 40(%ebp), %ebx
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: jne .LBB6_4
; X86-FAST-NEXT: .LBB6_5:
-; X86-FAST-NEXT: movl %eax, %ebp
+; X86-FAST-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-FAST-NEXT: .LBB6_6:
-; X86-FAST-NEXT: movl %ebx, %eax
-; X86-FAST-NEXT: shldl %cl, %ebp, %eax
-; X86-FAST-NEXT: movl %edi, %ebp
-; X86-FAST-NEXT: shldl %cl, %ebx, %ebp
-; X86-FAST-NEXT: movl %esi, %ebx
-; X86-FAST-NEXT: shldl %cl, %edi, %ebx
+; X86-FAST-NEXT: movl %esi, %edi
+; X86-FAST-NEXT: shldl %cl, %ebx, %edi
+; X86-FAST-NEXT: movl %eax, %edx
+; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: shldl %cl, %esi, %ebx
+; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-FAST-NEXT: movl %eax, %esi
+; X86-FAST-NEXT: shldl %cl, %edx, %esi
; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-FAST-NEXT: shldl %cl, %esi, %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-FAST-NEXT: movl %edx, 12(%ecx)
-; X86-FAST-NEXT: movl %ebx, 8(%ecx)
-; X86-FAST-NEXT: movl %ebp, 4(%ecx)
-; X86-FAST-NEXT: movl %eax, (%ecx)
-; X86-FAST-NEXT: movl %ecx, %eax
+; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-FAST-NEXT: shldl %cl, %eax, %edx
+; X86-FAST-NEXT: movl 8(%ebp), %eax
+; X86-FAST-NEXT: movl %edx, 12(%eax)
+; X86-FAST-NEXT: movl %esi, 8(%eax)
+; X86-FAST-NEXT: movl %ebx, 4(%eax)
+; X86-FAST-NEXT: movl %edi, (%eax)
+; X86-FAST-NEXT: leal -12(%ebp), %esp
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
; X86-FAST-NEXT: popl %ebx
@@ -320,77 +329,91 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-LABEL: var_shift_i128:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: pushl %ebp
+; X86-SLOW-NEXT: movl %esp, %ebp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: pushl %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: testb $64, %al
+; X86-SLOW-NEXT: andl $-16, %esp
+; X86-SLOW-NEXT: subl $32, %esp
+; X86-SLOW-NEXT: movl 24(%ebp), %esi
+; X86-SLOW-NEXT: movl 28(%ebp), %eax
+; X86-SLOW-NEXT: movl 48(%ebp), %edx
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: testb $64, %cl
+; X86-SLOW-NEXT: movl 52(%ebp), %edi
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %edx, %ebp
-; X86-SLOW-NEXT: movl %ebx, %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %esi, %edx
+; X86-SLOW-NEXT: movl 32(%ebp), %esi
; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: testb $32, %al
-; X86-SLOW-NEXT: je .LBB6_5
-; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edi, %ebx
-; X86-SLOW-NEXT: movl %edx, %edi
-; X86-SLOW-NEXT: movl %ecx, %edx
-; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl 36(%ebp), %eax
+; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: testb $32, %al
+; X86-SLOW-NEXT: movl 40(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %ecx
+; X86-SLOW-NEXT: .LBB6_3:
+; X86-SLOW-NEXT: movl 56(%ebp), %ebx
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: jne .LBB6_4
-; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: movl %ecx, %ebp
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: movl %ecx, %ebx
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: .LBB6_4:
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
; X86-SLOW-NEXT: movl %edx, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: shrl %ebp
-; X86-SLOW-NEXT: movb %al, %ch
-; X86-SLOW-NEXT: notb %ch
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: orl %esi, %ebp
-; X86-SLOW-NEXT: movl %edi, %esi
-; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: shrl %edx
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %esi, %edx
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: movb %al, %cl
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: shll %cl, %esi
+; X86-SLOW-NEXT: movl %ebx, %edi
; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movl %ecx, %ebx
+; X86-SLOW-NEXT: notb %bl
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-SLOW-NEXT: movl %esi, %eax
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %edx
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: orl %eax, %edx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ebx, %eax
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %ebx
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-SLOW-NEXT: shrl %cl, %ebx
; X86-SLOW-NEXT: orl %eax, %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl 8(%ebp), %eax
; X86-SLOW-NEXT: movl %ebx, 12(%eax)
-; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %esi, 8(%eax)
; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %ebp, (%eax)
-; X86-SLOW-NEXT: addl $4, %esp
+; X86-SLOW-NEXT: movl %edi, (%eax)
+; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 4340f8fd484ae..544ab7fc77374 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -258,51 +258,53 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-LABEL: var_shift_i128:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: pushl %ebp
+; X86-FAST-NEXT: movl %esp, %ebp
; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: pushl %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $-16, %esp
+; X86-FAST-NEXT: subl $16, %esp
+; X86-FAST-NEXT: movl 24(%ebp), %esi
+; X86-FAST-NEXT: movl 28(%ebp), %eax
+; X86-FAST-NEXT: movl 48(%ebp), %edx
+; X86-FAST-NEXT: movl 56(%ebp), %ecx
; X86-FAST-NEXT: testb $64, %cl
+; X86-FAST-NEXT: movl 52(%ebp), %ebx
; X86-FAST-NEXT: je .LBB6_1
; X86-FAST-NEXT: # %bb.2:
-; X86-FAST-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: movl %edi, %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl %esi, %ebp
-; X86-FAST-NEXT: movl %ebx, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %esi, %edx
+; X86-FAST-NEXT: movl 32(%ebp), %esi
+; X86-FAST-NEXT: movl %ebx, %edi
+; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: movl 36(%ebp), %eax
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB6_4
; X86-FAST-NEXT: jmp .LBB6_5
; X86-FAST-NEXT: .LBB6_1:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: movl %ebp, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-FAST-NEXT: movl 40(%ebp), %edi
+; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl 44(%ebp), %edi
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: jne .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %edi, %ebx
-; X86-FAST-NEXT: movl %esi, %edi
-; X86-FAST-NEXT: movl %edx, %esi
-; X86-FAST-NEXT: movl %ebp, %edx
-; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload
+; X86-FAST-NEXT: movl %esi, %eax
+; X86-FAST-NEXT: movl %ebx, %esi
+; X86-FAST-NEXT: movl %edx, %ebx
+; X86-FAST-NEXT: movl %edi, %edx
+; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-FAST-NEXT: .LBB6_5:
-; X86-FAST-NEXT: shrdl %cl, %edx, %ebp
-; X86-FAST-NEXT: shrdl %cl, %esi, %edx
-; X86-FAST-NEXT: shrdl %cl, %edi, %esi
+; X86-FAST-NEXT: shrdl %cl, %edx, %edi
+; X86-FAST-NEXT: shrdl %cl, %ebx, %edx
+; X86-FAST-NEXT: shrdl %cl, %esi, %ebx
; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-FAST-NEXT: shrdl %cl, %ebx, %edi
-; X86-FAST-NEXT: movl %edi, 12(%eax)
-; X86-FAST-NEXT: movl %esi, 8(%eax)
+; X86-FAST-NEXT: shrdl %cl, %eax, %esi
+; X86-FAST-NEXT: movl 8(%ebp), %eax
+; X86-FAST-NEXT: movl %esi, 12(%eax)
+; X86-FAST-NEXT: movl %ebx, 8(%eax)
; X86-FAST-NEXT: movl %edx, 4(%eax)
-; X86-FAST-NEXT: movl %ebp, (%eax)
-; X86-FAST-NEXT: addl $4, %esp
+; X86-FAST-NEXT: movl %edi, (%eax)
+; X86-FAST-NEXT: leal -12(%ebp), %esp
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
; X86-FAST-NEXT: popl %ebx
@@ -312,78 +314,88 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-LABEL: var_shift_i128:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: pushl %ebp
+; X86-SLOW-NEXT: movl %esp, %ebp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: subl $8, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: testb $64, %cl
+; X86-SLOW-NEXT: andl $-16, %esp
+; X86-SLOW-NEXT: subl $16, %esp
+; X86-SLOW-NEXT: movl 24(%ebp), %edx
+; X86-SLOW-NEXT: movl 28(%ebp), %esi
+; X86-SLOW-NEXT: movl 48(%ebp), %ebx
+; X86-SLOW-NEXT: movl 56(%ebp), %eax
+; X86-SLOW-NEXT: testb $64, %al
+; X86-SLOW-NEXT: movl 52(%ebp), %edi
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebp, %eax
-; X86-SLOW-NEXT: movl %ebx, %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movl %edi, %edx
+; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl 32(%ebp), %edx
+; X86-SLOW-NEXT: movl %edi, %eax
; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: testb $32, %cl
-; X86-SLOW-NEXT: jne .LBB6_5
-; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebp, %edi
-; X86-SLOW-NEXT: movl %edx, %ebp
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: movl 36(%ebp), %esi
+; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movl 40(%ebp), %eax
+; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %eax
+; X86-SLOW-NEXT: .LBB6_3:
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: je .LBB6_4
-; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: .LBB6_4:
+; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, %ebx
+; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: notb %bl
-; X86-SLOW-NEXT: leal (%ebp,%ebp), %eax
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: orl %edx, %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: movl %eax, %edx
+; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: notb %al
+; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: addl %ebx, %ebx
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: shll %cl, %ebx
+; X86-SLOW-NEXT: orl %edx, %ebx
+; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: leal (%edi,%edi), %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx
+; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: orl %ebp, %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: orl %edi, %edx
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl (%esp), %edi # 4-byte Reload
-; X86-SLOW-NEXT: leal (%edi,%edi), %ebp
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-SLOW-NEXT: leal (%edi,%edi), %ebx
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: shll %cl, %ebx
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: addl %esi, %esi
-; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: movl 8(%ebp), %ecx
; X86-SLOW-NEXT: movl %esi, 12(%ecx)
-; X86-SLOW-NEXT: movl %ebp, 8(%ecx)
+; X86-SLOW-NEXT: movl %ebx, 8(%ecx)
; X86-SLOW-NEXT: movl %edx, 4(%ecx)
+; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: movl %eax, (%ecx)
; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: addl $8, %esp
+; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index a464d78f9af38..df97f49440f74 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -74,43 +74,57 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SSE2-LABEL: fshl_i128:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 48(%ebp), %edi
+; X86-SSE2-NEXT: movl 52(%ebp), %eax
+; X86-SSE2-NEXT: movl 24(%ebp), %edx
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
; X86-SSE2-NEXT: testb $64, %cl
-; X86-SSE2-NEXT: movl %esi, %eax
-; X86-SSE2-NEXT: cmovnel %ebx, %eax
-; X86-SSE2-NEXT: movl %edx, %ebp
-; X86-SSE2-NEXT: cmovnel %edi, %ebp
-; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi
-; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx
-; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT: movl %edx, %ecx
+; X86-SSE2-NEXT: cmovnel %edi, %ecx
+; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: movl 28(%ebp), %esi
+; X86-SSE2-NEXT: movl %esi, %ebx
+; X86-SSE2-NEXT: cmovnel %eax, %ebx
+; X86-SSE2-NEXT: cmovnel 44(%ebp), %eax
+; X86-SSE2-NEXT: cmovnel 40(%ebp), %edi
+; X86-SSE2-NEXT: cmovel 36(%ebp), %esi
+; X86-SSE2-NEXT: cmovel 32(%ebp), %edx
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
; X86-SSE2-NEXT: testb $32, %cl
-; X86-SSE2-NEXT: cmovnel %esi, %edx
-; X86-SSE2-NEXT: cmovnel %ebp, %esi
-; X86-SSE2-NEXT: cmovnel %eax, %ebp
-; X86-SSE2-NEXT: cmovel %edi, %ebx
+; X86-SSE2-NEXT: cmovnel %edx, %esi
+; X86-SSE2-NEXT: cmovnel %ebx, %edx
+; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE2-NEXT: cmovnel %ecx, %ebx
; X86-SSE2-NEXT: cmovel %eax, %edi
-; X86-SSE2-NEXT: movl %edi, %eax
-; X86-SSE2-NEXT: shldl %cl, %ebx, %eax
-; X86-SSE2-NEXT: movl %ebp, %ebx
-; X86-SSE2-NEXT: shldl %cl, %edi, %ebx
-; X86-SSE2-NEXT: movl %esi, %edi
-; X86-SSE2-NEXT: shldl %cl, %ebp, %edi
+; X86-SSE2-NEXT: cmovel %ecx, %eax
+; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SSE2-NEXT: shldl %cl, %esi, %edx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movl %edx, 12(%ecx)
-; X86-SSE2-NEXT: movl %edi, 8(%ecx)
-; X86-SSE2-NEXT: movl %ebx, 4(%ecx)
-; X86-SSE2-NEXT: movl %eax, (%ecx)
-; X86-SSE2-NEXT: movl %ecx, %eax
+; X86-SSE2-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-SSE2-NEXT: movl %ebx, %edi
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
+; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SSE2-NEXT: shldl %cl, %eax, %edi
+; X86-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: movl %edx, %edi
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
+; X86-SSE2-NEXT: shldl %cl, %ebx, %edi
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
+; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SSE2-NEXT: shldl %cl, %edx, %esi
+; X86-SSE2-NEXT: movl %esi, 12(%eax)
+; X86-SSE2-NEXT: movl %edi, 8(%eax)
+; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE2-NEXT: movl %ecx, 4(%eax)
+; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE2-NEXT: movl %ecx, (%eax)
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: popl %edi
; X86-SSE2-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll
index 2849e448a0534..b4546c1e983c4 100644
--- a/llvm/test/CodeGen/X86/i128-add.ll
+++ b/llvm/test/CodeGen/X86/i128-add.ll
@@ -5,17 +5,20 @@
define i128 @add_i128(i128 %x, i128 %y) nounwind {
; X86-LABEL: add_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: addl 40(%ebp), %esi
+; X86-NEXT: adcl 44(%ebp), %edi
+; X86-NEXT: adcl 48(%ebp), %ecx
+; X86-NEXT: adcl 52(%ebp), %edx
; X86-NEXT: addl $1, %esi
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %ecx
@@ -24,8 +27,10 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind {
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: add_i128:
diff --git a/llvm/test/CodeGen/X86/i128-sdiv.ll b/llvm/test/CodeGen/X86/i128-sdiv.ll
index 717f52f198ee8..f8a919c0400f8 100644
--- a/llvm/test/CodeGen/X86/i128-sdiv.ll
+++ b/llvm/test/CodeGen/X86/i128-sdiv.ll
@@ -8,18 +8,21 @@
define i128 @test1(i128 %x) nounwind {
; X86-LABEL: test1:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: shrl $30, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: addl %esi, %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: adcl 28(%ebp), %esi
+; X86-NEXT: adcl 32(%ebp), %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: shrdl $2, %ecx, %edx
; X86-NEXT: movl %ecx, %esi
@@ -29,8 +32,10 @@ define i128 @test1(i128 %x) nounwind {
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test1:
@@ -52,38 +57,44 @@ define i128 @test1(i128 %x) nounwind {
define i128 @test2(i128 %x) nounwind {
; X86-LABEL: test2:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: shrl $30, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl %esi, %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: shrdl $2, %edx, %ecx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: adcl 28(%ebp), %edx
+; X86-NEXT: adcl 32(%ebp), %ecx
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: shrdl $2, %eax, %ecx
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: sarl $31, %esi
-; X86-NEXT: sarl $2, %edx
-; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: sarl $2, %eax
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: negl %ecx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test2:
@@ -107,8 +118,317 @@ define i128 @test2(i128 %x) nounwind {
define i128 @test3(i128 %x) nounwind {
; X86-LABEL: test3:
-; X86 doesn't have __divti3, so the urem is expanded into a loop.
-; X86: udiv-do-while
+; X86: # %bb.0: # %_udiv-special-cases
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $160, %esp
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: xorl %edi, %ebx
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: subl %edi, %ecx
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: jne .LBB2_1
+; X86-NEXT: # %bb.2: # %_udiv-special-cases
+; X86-NEXT: bsrl %esi, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: addl $32, %ecx
+; X86-NEXT: jmp .LBB2_3
+; X86-NEXT: .LBB2_1:
+; X86-NEXT: bsrl %ebx, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: .LBB2_3: # %_udiv-special-cases
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: jne .LBB2_4
+; X86-NEXT: # %bb.5: # %_udiv-special-cases
+; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: xorl $31, %edx
+; X86-NEXT: addl $32, %edx
+; X86-NEXT: jmp .LBB2_6
+; X86-NEXT: .LBB2_4:
+; X86-NEXT: bsrl %edx, %edx
+; X86-NEXT: xorl $31, %edx
+; X86-NEXT: .LBB2_6: # %_udiv-special-cases
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %ebx, %esi
+; X86-NEXT: jne .LBB2_8
+; X86-NEXT: # %bb.7: # %_udiv-special-cases
+; X86-NEXT: addl $64, %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: .LBB2_8: # %_udiv-special-cases
+; X86-NEXT: movl $61, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: movl $127, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: orb %cl, %al
+; X86-NEXT: testb %al, %al
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jne .LBB2_10
+; X86-NEXT: # %bb.9: # %_udiv-special-cases
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: .LBB2_10: # %_udiv-special-cases
+; X86-NEXT: notl %edi
+; X86-NEXT: jne .LBB2_11
+; X86-NEXT: # %bb.18: # %_udiv-special-cases
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl $127, %ecx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: je .LBB2_19
+; X86-NEXT: # %bb.15: # %udiv-bb1
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: xorb $127, %cl
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrb $3, %dl
+; X86-NEXT: andb $12, %dl
+; X86-NEXT: negb %dl
+; X86-NEXT: movsbl %dl, %edx
+; X86-NEXT: movl 136(%esp,%edx), %esi
+; X86-NEXT: movl 140(%esp,%edx), %ebx
+; X86-NEXT: shldl %cl, %esi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 128(%esp,%edx), %ebx
+; X86-NEXT: movl 132(%esp,%edx), %edx
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: shldl %cl, %ebx, %edx
+; X86-NEXT: shll %cl, %ebx
+; X86-NEXT: addl $1, %eax
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jae .LBB2_12
+; X86-NEXT: # %bb.16:
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jmp .LBB2_17
+; X86-NEXT: .LBB2_11:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jmp .LBB2_19
+; X86-NEXT: .LBB2_12: # %udiv-preheader
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 92(%esp,%eax), %edx
+; X86-NEXT: movl 88(%esp,%eax), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrdl %cl, %edx, %edi
+; X86-NEXT: movl 80(%esp,%eax), %ebx
+; X86-NEXT: movl 84(%esp,%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shrdl %cl, %eax, %esi
+; X86-NEXT: shrl %cl, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shrdl %cl, %eax, %ebx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl $3, %ecx
+; X86-NEXT: addl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $4, %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB2_13: # %udiv-do-while
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shldl $1, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: shldl $1, %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %ebx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl $1, %ecx, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %ecx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl $4, %edi
+; X86-NEXT: andl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl $3, %ebx
+; X86-NEXT: andl %ebx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: subl %ecx, %ebx
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: addl $-1, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl $-1, %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: jne .LBB2_13
+; X86-NEXT: # %bb.14:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: .LBB2_17: # %udiv-loop-exit
+; X86-NEXT: shldl $1, %esi, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: shldl $1, %ebx, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: addl %ebx, %ebx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: .LBB2_19: # %udiv-end
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: xorl %edi, %ebx
+; X86-NEXT: subl %edi, %ebx
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
;
; X64-LABEL: test3:
; X64: # %bb.0:
@@ -118,6 +438,7 @@ define i128 @test3(i128 %x) nounwind {
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
%tmp = sdiv i128 %x, -73786976294838206467
ret i128 %tmp
}
diff --git a/llvm/test/CodeGen/X86/i128-udiv.ll b/llvm/test/CodeGen/X86/i128-udiv.ll
index 3f890b7f2443a..e7168ed4f0b5a 100644
--- a/llvm/test/CodeGen/X86/i128-udiv.ll
+++ b/llvm/test/CodeGen/X86/i128-udiv.ll
@@ -8,15 +8,21 @@
define i128 @test1(i128 %x) nounwind {
; X86-LABEL: test1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %edx
; X86-NEXT: shrdl $2, %edx, %ecx
; X86-NEXT: shrl $2, %edx
; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test1:
@@ -31,8 +37,288 @@ define i128 @test1(i128 %x) nounwind {
define i128 @test2(i128 %x) nounwind {
; X86-LABEL: test2:
-; X86 doesn't have __divti3, so the urem is expanded into a loop.
-; X86: udiv-do-while
+; X86: # %bb.0: # %_udiv-special-cases
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $144, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: jne .LBB1_1
+; X86-NEXT: # %bb.2: # %_udiv-special-cases
+; X86-NEXT: bsrl %edx, %ebx
+; X86-NEXT: xorl $31, %ebx
+; X86-NEXT: addl $32, %ebx
+; X86-NEXT: jmp .LBB1_3
+; X86-NEXT: .LBB1_1:
+; X86-NEXT: bsrl %esi, %ebx
+; X86-NEXT: xorl $31, %ebx
+; X86-NEXT: .LBB1_3: # %_udiv-special-cases
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: jne .LBB1_4
+; X86-NEXT: # %bb.5: # %_udiv-special-cases
+; X86-NEXT: bsrl %edi, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: addl $32, %ecx
+; X86-NEXT: jmp .LBB1_6
+; X86-NEXT: .LBB1_4:
+; X86-NEXT: bsrl %eax, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: .LBB1_6: # %_udiv-special-cases
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: jne .LBB1_8
+; X86-NEXT: # %bb.7: # %_udiv-special-cases
+; X86-NEXT: addl $64, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: .LBB1_8: # %_udiv-special-cases
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: movl $127, %ecx
+; X86-NEXT: cmpl %ebx, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT: orb %cl, %bl
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: xorl $127, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: sete %al
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: jne .LBB1_10
+; X86-NEXT: # %bb.9: # %_udiv-special-cases
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: .LBB1_10: # %_udiv-special-cases
+; X86-NEXT: orb %al, %bl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: jne .LBB1_17
+; X86-NEXT: # %bb.11: # %udiv-bb1
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: xorb $127, %cl
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrb $3, %dl
+; X86-NEXT: andb $12, %dl
+; X86-NEXT: negb %dl
+; X86-NEXT: movsbl %dl, %edx
+; X86-NEXT: movl 120(%esp,%edx), %edi
+; X86-NEXT: movl 124(%esp,%edx), %eax
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 112(%esp,%edx), %eax
+; X86-NEXT: movl 116(%esp,%edx), %edx
+; X86-NEXT: shldl %cl, %edx, %edi
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl $1, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jae .LBB1_12
+; X86-NEXT: # %bb.15:
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: jmp .LBB1_16
+; X86-NEXT: .LBB1_12: # %udiv-preheader
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: movzbl %al, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 76(%esp,%esi), %eax
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 72(%esp,%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: shrdl %cl, %eax, %esi
+; X86-NEXT: movl 64(%esp,%edi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 68(%esp,%edi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shrdl %cl, %edi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shrdl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl $-4, %ecx
+; X86-NEXT: addl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB1_13: # %udiv-do-while
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %esi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $1, %esi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl $1, %ecx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %ecx
+; X86-NEXT: orl %ebx, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl $1, %ecx, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %ecx, %ecx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmpl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: andl $1, %ebx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: andl %edx, %edi
+; X86-NEXT: movl $-4, %edx
+; X86-NEXT: andl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: subl %ecx, %esi
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: addl $-1, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: adcl $-1, %edx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: jne .LBB1_13
+; X86-NEXT: # %bb.14:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: .LBB1_16: # %udiv-loop-exit
+; X86-NEXT: shldl $1, %edi, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: shldl $1, %ecx, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: addl %ecx, %ecx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: .LBB1_17: # %udiv-end
+; X86-NEXT: movl %ecx, (%esi)
+; X86-NEXT: movl %edx, 4(%esi)
+; X86-NEXT: movl %edi, 8(%esi)
+; X86-NEXT: movl %ebx, 12(%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
;
; X64-LABEL: test2:
; X64: # %bb.0:
@@ -42,14 +328,310 @@ define i128 @test2(i128 %x) nounwind {
; X64-NEXT: callq __udivti3 at PLT
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
%tmp = udiv i128 %x, -73786976294838206464
ret i128 %tmp
}
define i128 @test3(i128 %x) nounwind {
; X86-LABEL: test3:
-; X86 doesn't have __divti3, so the urem is expanded into a loop.
-; X86: udiv-do-while
+; X86: # %bb.0: # %_udiv-special-cases
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $160, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: jne .LBB2_1
+; X86-NEXT: # %bb.2: # %_udiv-special-cases
+; X86-NEXT: bsrl %edx, %ebx
+; X86-NEXT: xorl $31, %ebx
+; X86-NEXT: addl $32, %ebx
+; X86-NEXT: jmp .LBB2_3
+; X86-NEXT: .LBB2_1:
+; X86-NEXT: bsrl %esi, %ebx
+; X86-NEXT: xorl $31, %ebx
+; X86-NEXT: .LBB2_3: # %_udiv-special-cases
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: jne .LBB2_4
+; X86-NEXT: # %bb.5: # %_udiv-special-cases
+; X86-NEXT: bsrl %edi, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: addl $32, %ecx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: je .LBB2_7
+; X86-NEXT: jmp .LBB2_8
+; X86-NEXT: .LBB2_4:
+; X86-NEXT: bsrl %eax, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: jne .LBB2_8
+; X86-NEXT: .LBB2_7: # %_udiv-special-cases
+; X86-NEXT: addl $64, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: .LBB2_8: # %_udiv-special-cases
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: movl $127, %ecx
+; X86-NEXT: cmpl %ebx, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT: orb %cl, %bl
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: xorl $127, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: sete %cl
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: jne .LBB2_10
+; X86-NEXT: # %bb.9: # %_udiv-special-cases
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: .LBB2_10: # %_udiv-special-cases
+; X86-NEXT: orb %cl, %bl
+; X86-NEXT: jne .LBB2_17
+; X86-NEXT: # %bb.11: # %udiv-bb1
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: xorb $127, %cl
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrb $3, %dl
+; X86-NEXT: andb $12, %dl
+; X86-NEXT: negb %dl
+; X86-NEXT: movsbl %dl, %edx
+; X86-NEXT: movl 136(%esp,%edx), %ebx
+; X86-NEXT: movl 140(%esp,%edx), %esi
+; X86-NEXT: shldl %cl, %ebx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 128(%esp,%edx), %esi
+; X86-NEXT: movl 132(%esp,%edx), %edx
+; X86-NEXT: shldl %cl, %edx, %ebx
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl $1, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jae .LBB2_12
+; X86-NEXT: # %bb.15:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: jmp .LBB2_16
+; X86-NEXT: .LBB2_12: # %udiv-preheader
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: movl 92(%esp,%eax), %edi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 88(%esp,%eax), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shrdl %cl, %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 80(%esp,%eax), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl 84(%esp,%eax), %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shrdl %cl, %esi, %ebx
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: shrl %cl, %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shrdl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-3, %eax
+; X86-NEXT: addl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-5, %eax
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB2_13: # %udiv-do-while
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: shldl $1, %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $1, %esi, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $1, %ebx, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %ebx
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %edx, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: andl %edx, %ebx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: movl $-5, %eax
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movl $-3, %eax
+; X86-NEXT: andl %eax, %ecx
+; X86-NEXT: subl %ecx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: addl $-1, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: adcl $-1, %edi
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edi, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jne .LBB2_13
+; X86-NEXT: # %bb.14:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: .LBB2_16: # %udiv-loop-exit
+; X86-NEXT: shldl $1, %ebx, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %esi, %ebx
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %edi, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: addl %edi, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: .LBB2_17: # %udiv-end
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
;
; X64-LABEL: test3:
; X64: # %bb.0:
@@ -59,6 +641,7 @@ define i128 @test3(i128 %x) nounwind {
; X64-NEXT: callq __udivti3 at PLT
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
%tmp = udiv i128 %x, -73786976294838206467
ret i128 %tmp
}
diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll
index 55c318e87a5a0..bdceeefbcfaba 100644
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@@ -123,31 +123,34 @@ define i64 @test_i64(i64 %a) nounwind {
define i128 @test_i128(i128 %a) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: subl %edx, %ebx
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test_i128:
diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
index c52b3ed6c926d..4a6c1d0ae5deb 100644
--- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
@@ -10,33 +10,39 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; X86-LABEL: opt_setcc_lt_power_of_2:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 24(%ebp), %esi
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB0_1: # %loop
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: addl $1, %edi
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: addl $1, %esi
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: orl %ecx, %ebx
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: orl %edx, %ebp
-; X86-NEXT: orl %ecx, %ebp
-; X86-NEXT: shrdl $28, %ebx, %ebp
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: shrdl $28, %ebx, %esi
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %exit
-; X86-NEXT: movl %edi, (%eax)
-; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -73,15 +79,21 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_eq_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: orl 20(%ebp), %ecx
+; X86-NEXT: movl 8(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: shldl $15, %edx, %ecx
; X86-NEXT: sete %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_eq_zero:
@@ -98,15 +110,21 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_ne_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: orl 20(%ebp), %ecx
+; X86-NEXT: movl 8(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: shldl $15, %edx, %ecx
; X86-NEXT: setne %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_ne_zero:
@@ -123,13 +141,19 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 20(%ebp), %ecx
; X86-NEXT: shll $17, %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl 8(%ebp), %eax
+; X86-NEXT: orl 12(%ebp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: sete %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero:
@@ -146,13 +170,19 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_ne_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 20(%ebp), %ecx
; X86-NEXT: shll $17, %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl 8(%ebp), %eax
+; X86-NEXT: orl 12(%ebp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: setne %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_ne_zero:
@@ -170,13 +200,17 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 20(%ebp), %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
@@ -194,9 +228,11 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
; X86-NEXT: calll use at PLT
; X86-NEXT: addl $16, %esp
; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll
index fc1cc1f65627a..e10e48f9aea08 100644
--- a/llvm/test/CodeGen/X86/mul128.ll
+++ b/llvm/test/CodeGen/X86/mul128.ll
@@ -18,85 +18,80 @@ define i128 @foo(i128 %t, i128 %u) {
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %ebx
-; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %edi
-; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
-; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 28
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
-; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %edi
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: imull %ecx, %esi
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %ebp, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull %esi, %eax
+; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: imull %edi, %eax
; X86-NEXT: addl %eax, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %ecx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: imull %ebp, %esi
+; X86-NEXT: movl 48(%ebp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull 28(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: mull %edi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: imull %edi, %esi
; X86-NEXT: addl %edx, %esi
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %ebx, %esi
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: mull 44(%ebp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ebx
; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: mull 44(%ebp)
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ebx, 4(%ecx)
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, 4(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
-; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
-; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: popl %ebx
-; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl $4
%k = mul i128 %t, %u
ret i128 %k
diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll
index 961205c50d976..724b2dc4c431a 100644
--- a/llvm/test/CodeGen/X86/neg-abs.ll
+++ b/llvm/test/CodeGen/X86/neg-abs.ll
@@ -105,31 +105,35 @@ define i128 @neg_abs_i128(i128 %x) nounwind {
; X86-LABEL: neg_abs_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: movl 32(%ebp), %edx
; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: xorl %ecx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %ecx, %ebx
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: subl %ebx, %ebp
; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: subl %edi, %ebx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -259,37 +263,42 @@ define i64 @sub_abs_i64(i64 %x, i64 %y) nounwind {
define i128 @sub_abs_i128(i128 %x, i128 %y) nounwind {
; X86-LABEL: sub_abs_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl 32(%ebp), %ecx
; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: xorl %edx, %edi
; X86-NEXT: subl %edx, %edi
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl 40(%ebp), %edx
; X86-NEXT: subl %edi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 48(%ebp), %esi
; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: sbbl %eax, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: sub_abs_i128:
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 35c7c0e09f394..3004b8b72fcc5 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -340,84 +340,87 @@ define i64 @cnt64(i64 %x) nounwind readnone {
define i128 @cnt128(i128 %x) nounwind readnone {
; X86-NOSSE-LABEL: cnt128:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %edi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: movl 24(%ebp), %eax
+; X86-NOSSE-NEXT: movl 32(%ebp), %ecx
+; X86-NOSSE-NEXT: movl 36(%ebp), %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %esi
; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %esi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %esi
-; X86-NOSSE-NEXT: addl %edi, %esi
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %edx, %edi
-; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %ecx
; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: addl %esi, %ecx
; X86-NOSSE-NEXT: movl %ecx, %edi
; X86-NOSSE-NEXT: shrl $4, %edi
; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: movl 28(%ebp), %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
; X86-NOSSE-NEXT: addl %edx, %ecx
-; X86-NOSSE-NEXT: addl %esi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl $4, %esi
+; X86-NOSSE-NEXT: addl %eax, %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %esi, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: addl %ecx, %edx
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: movl %edx, (%eax)
; X86-NOSSE-NEXT: movl $0, 12(%eax)
; X86-NOSSE-NEXT: movl $0, 8(%eax)
; X86-NOSSE-NEXT: movl $0, 4(%eax)
+; X86-NOSSE-NEXT: leal -8(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl $4
;
; X64-BASE-LABEL: cnt128:
@@ -462,20 +465,26 @@ define i128 @cnt128(i128 %x) nounwind readnone {
;
; X86-POPCNT-LABEL: cnt128:
; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %ebp
+; X86-POPCNT-NEXT: movl %esp, %ebp
; X86-POPCNT-NEXT: pushl %esi
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: andl $-16, %esp
+; X86-POPCNT-NEXT: subl $16, %esp
+; X86-POPCNT-NEXT: movl 8(%ebp), %eax
+; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi
; X86-POPCNT-NEXT: addl %ecx, %esi
; X86-POPCNT-NEXT: addl %edx, %esi
; X86-POPCNT-NEXT: movl %esi, (%eax)
; X86-POPCNT-NEXT: movl $0, 12(%eax)
; X86-POPCNT-NEXT: movl $0, 8(%eax)
; X86-POPCNT-NEXT: movl $0, 4(%eax)
+; X86-POPCNT-NEXT: leal -4(%ebp), %esp
; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: popl %ebp
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128:
@@ -522,7 +531,11 @@ define i128 @cnt128(i128 %x) nounwind readnone {
;
; X86-SSE2-LABEL: cnt128:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
@@ -564,11 +577,17 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X86-SSE2-NEXT: movl $0, 12(%eax)
; X86-SSE2-NEXT: movl $0, 8(%eax)
; X86-SSE2-NEXT: movl $0, 4(%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl $4
;
; X86-SSSE3-LABEL: cnt128:
; X86-SSSE3: # %bb.0:
-; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: pushl %ebp
+; X86-SSSE3-NEXT: movl %esp, %ebp
+; X86-SSSE3-NEXT: andl $-16, %esp
+; X86-SSSE3-NEXT: subl $16, %esp
+; X86-SSSE3-NEXT: movl 8(%ebp), %eax
; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
@@ -600,6 +619,8 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X86-SSSE3-NEXT: movl $0, 12(%eax)
; X86-SSSE3-NEXT: movl $0, 8(%eax)
; X86-SSSE3-NEXT: movl $0, 4(%eax)
+; X86-SSSE3-NEXT: movl %ebp, %esp
+; X86-SSSE3-NEXT: popl %ebp
; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
@@ -928,87 +949,92 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-NOSSE-LABEL: cnt128_optsize:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ecx
-; X86-NOSSE-NEXT: shrl %ecx
-; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %edi, %ecx
-; X86-NOSSE-NEXT: subl %ecx, %ebx
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: movl 32(%ebp), %edx
+; X86-NOSSE-NEXT: movl 36(%ebp), %esi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: shrl %eax
+; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: subl %eax, %esi
; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ecx, %ebp
+; X86-NOSSE-NEXT: movl %esi, %edi
+; X86-NOSSE-NEXT: andl %ecx, %edi
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: addl %edi, %esi
+; X86-NOSSE-NEXT: movl %esi, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %esi, %edi
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: movl $1431655765, %eax # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %eax, %esi
+; X86-NOSSE-NEXT: subl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: shrl $2, %edx
+; X86-NOSSE-NEXT: andl %ecx, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %ebx
+; X86-NOSSE-NEXT: shrl $4, %ebx
+; X86-NOSSE-NEXT: addl %edx, %ebx
+; X86-NOSSE-NEXT: movl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %edx, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: andl %edx, %ebx
+; X86-NOSSE-NEXT: imull $16843009, %ebx, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: addl %edi, %edx
+; X86-NOSSE-NEXT: movl 28(%ebp), %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl %edi
+; X86-NOSSE-NEXT: andl %eax, %edi
+; X86-NOSSE-NEXT: subl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: andl %ecx, %edi
; X86-NOSSE-NEXT: shrl $2, %ebx
; X86-NOSSE-NEXT: andl %ecx, %ebx
-; X86-NOSSE-NEXT: addl %ebp, %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %ebx, %ebp
+; X86-NOSSE-NEXT: addl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ebx, %edi
+; X86-NOSSE-NEXT: movl 24(%ebp), %eax
; X86-NOSSE-NEXT: movl %eax, %ebx
; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl %edi, %ebx
+; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %esi, %ebx
; X86-NOSSE-NEXT: subl %ebx, %eax
; X86-NOSSE-NEXT: movl %eax, %ebx
; X86-NOSSE-NEXT: andl %ecx, %ebx
; X86-NOSSE-NEXT: shrl $2, %eax
; X86-NOSSE-NEXT: andl %ecx, %eax
; X86-NOSSE-NEXT: addl %ebx, %eax
-; X86-NOSSE-NEXT: movl %eax, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: movl %eax, %ecx
+; X86-NOSSE-NEXT: shrl $4, %ecx
+; X86-NOSSE-NEXT: addl %eax, %ecx
+; X86-NOSSE-NEXT: movl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %eax, %edi
+; X86-NOSSE-NEXT: andl %eax, %ecx
+; X86-NOSSE-NEXT: imull $16843009, %edi, %eax # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %eax
-; X86-NOSSE-NEXT: andl %ebx, %edi
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %ebp, %eax
-; X86-NOSSE-NEXT: subl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
-; X86-NOSSE-NEXT: shrl $2, %esi
-; X86-NOSSE-NEXT: andl %ecx, %esi
-; X86-NOSSE-NEXT: addl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %esi, %ebp
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %esi, %eax
-; X86-NOSSE-NEXT: subl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
-; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl %ecx, %edx
-; X86-NOSSE-NEXT: addl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl $4, %eax
-; X86-NOSSE-NEXT: addl %edx, %eax
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ebx, %eax
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
-; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: addl %ecx, %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
-; X86-NOSSE-NEXT: movl %edx, (%eax)
+; X86-NOSSE-NEXT: addl %eax, %ecx
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: movl %edx, 12(%eax)
+; X86-NOSSE-NEXT: movl %edx, 8(%eax)
+; X86-NOSSE-NEXT: movl %edx, 4(%eax)
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: leal -12(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
; X86-NOSSE-NEXT: popl %ebx
@@ -1057,13 +1083,17 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
;
; X86-POPCNT-LABEL: cnt128_optsize:
; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %ebp
+; X86-POPCNT-NEXT: movl %esp, %ebp
; X86-POPCNT-NEXT: pushl %esi
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: andl $-16, %esp
+; X86-POPCNT-NEXT: subl $16, %esp
+; X86-POPCNT-NEXT: movl 8(%ebp), %eax
+; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi
; X86-POPCNT-NEXT: addl %ecx, %esi
; X86-POPCNT-NEXT: addl %edx, %esi
; X86-POPCNT-NEXT: xorl %ecx, %ecx
@@ -1071,7 +1101,9 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: leal -4(%ebp), %esp
; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: popl %ebp
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_optsize:
@@ -1118,7 +1150,11 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
;
; X86-SSE2-LABEL: cnt128_optsize:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
@@ -1161,11 +1197,17 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-SSE2-NEXT: movl %ecx, 8(%eax)
; X86-SSE2-NEXT: movl %ecx, 4(%eax)
; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl $4
;
; X86-SSSE3-LABEL: cnt128_optsize:
; X86-SSSE3: # %bb.0:
-; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: pushl %ebp
+; X86-SSSE3-NEXT: movl %esp, %ebp
+; X86-SSSE3-NEXT: andl $-16, %esp
+; X86-SSSE3-NEXT: subl $16, %esp
+; X86-SSSE3-NEXT: movl 8(%ebp), %eax
; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
@@ -1198,6 +1240,8 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: movl %ebp, %esp
+; X86-SSSE3-NEXT: popl %ebp
; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
@@ -1415,85 +1459,88 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-NOSSE-LABEL: cnt128_pgso:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %edi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: movl 24(%ebp), %eax
+; X86-NOSSE-NEXT: movl 32(%ebp), %ecx
+; X86-NOSSE-NEXT: movl 36(%ebp), %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %esi
; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %esi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %esi
-; X86-NOSSE-NEXT: addl %edi, %esi
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %edx, %edi
-; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %ecx
; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: addl %esi, %ecx
; X86-NOSSE-NEXT: movl %ecx, %edi
; X86-NOSSE-NEXT: shrl $4, %edi
; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: movl 28(%ebp), %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
; X86-NOSSE-NEXT: addl %edx, %ecx
-; X86-NOSSE-NEXT: addl %esi, %ecx
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: movl %edx, 12(%eax)
-; X86-NOSSE-NEXT: movl %edx, 8(%eax)
-; X86-NOSSE-NEXT: movl %edx, 4(%eax)
-; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl $4, %esi
+; X86-NOSSE-NEXT: addl %eax, %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %esi, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: addl %ecx, %edx
+; X86-NOSSE-NEXT: xorl %ecx, %ecx
+; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
+; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
+; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
+; X86-NOSSE-NEXT: movl %edx, (%eax)
+; X86-NOSSE-NEXT: leal -8(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl $4
;
; X64-BASE-LABEL: cnt128_pgso:
@@ -1538,13 +1585,17 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
;
; X86-POPCNT-LABEL: cnt128_pgso:
; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %ebp
+; X86-POPCNT-NEXT: movl %esp, %ebp
; X86-POPCNT-NEXT: pushl %esi
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: andl $-16, %esp
+; X86-POPCNT-NEXT: subl $16, %esp
+; X86-POPCNT-NEXT: movl 8(%ebp), %eax
+; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi
; X86-POPCNT-NEXT: addl %ecx, %esi
; X86-POPCNT-NEXT: addl %edx, %esi
; X86-POPCNT-NEXT: xorl %ecx, %ecx
@@ -1552,7 +1603,9 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: leal -4(%ebp), %esp
; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: popl %ebp
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_pgso:
@@ -1599,7 +1652,11 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
;
; X86-SSE2-LABEL: cnt128_pgso:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
@@ -1642,11 +1699,17 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-SSE2-NEXT: movl %ecx, 8(%eax)
; X86-SSE2-NEXT: movl %ecx, 4(%eax)
; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl $4
;
; X86-SSSE3-LABEL: cnt128_pgso:
; X86-SSSE3: # %bb.0:
-; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: pushl %ebp
+; X86-SSSE3-NEXT: movl %esp, %ebp
+; X86-SSSE3-NEXT: andl $-16, %esp
+; X86-SSSE3-NEXT: subl $16, %esp
+; X86-SSSE3-NEXT: movl 8(%ebp), %eax
; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
@@ -1679,6 +1742,8 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: movl %ebp, %esp
+; X86-SSSE3-NEXT: popl %ebp
; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
diff --git a/llvm/test/CodeGen/X86/pr46004.ll b/llvm/test/CodeGen/X86/pr46004.ll
index f7c7da089c365..829d6dfceba3d 100644
--- a/llvm/test/CodeGen/X86/pr46004.ll
+++ b/llvm/test/CodeGen/X86/pr46004.ll
@@ -6,7 +6,17 @@
define void @fuzz22357(i128 %a0) {
; X86-LABEL: fuzz22357:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
; X86-NEXT: movb $0, (%eax)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: fuzz22357:
@@ -24,6 +34,15 @@ define void @fuzz22357(i128 %a0) {
define void @fuzz22723(i128 %a0) {
; X86-LABEL: fuzz22723:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: fuzz22723:
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
index 50a967e1c2a1a..ce9723b3a84bc 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
@@ -762,11 +762,15 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind {
define i32 @t_to_u32(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_u32:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixunstfsi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_u32:
@@ -797,12 +801,18 @@ define i32 @t_to_u32(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_u32:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixunstfsi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_u32:
@@ -835,12 +845,18 @@ define i32 @t_to_u32(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_u32:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixunstfsi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_u32:
@@ -860,11 +876,15 @@ define i32 @t_to_u32(fp128 %a) nounwind {
define i32 @t_to_s32(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_s32:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixtfsi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_s32:
@@ -895,12 +915,18 @@ define i32 @t_to_s32(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_s32:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixtfsi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_s32:
@@ -933,12 +959,18 @@ define i32 @t_to_s32(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_s32:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixtfsi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_s32:
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index f516db8b30ffe..3287869f2c601 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -1417,11 +1417,15 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind {
define i64 @t_to_u64(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_u64:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixunstfdi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_u64:
@@ -1452,12 +1456,18 @@ define i64 @t_to_u64(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_u64:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixunstfdi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_u64:
@@ -1490,12 +1500,18 @@ define i64 @t_to_u64(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_u64:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixunstfdi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_u64:
@@ -1515,11 +1531,15 @@ define i64 @t_to_u64(fp128 %a) nounwind {
define i64 @t_to_s64(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_s64:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixtfdi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_s64:
@@ -1550,12 +1570,18 @@ define i64 @t_to_s64(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_s64:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixtfdi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_s64:
@@ -1588,12 +1614,18 @@ define i64 @t_to_s64(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_s64:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixtfdi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_s64:
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 874913629e9e3..8a287229a1cb1 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -118,30 +118,33 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; X86-LABEL: scmp.8.128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: sbbl %eax, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: cmpl %ecx, 8(%ebp)
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 16(%ebp), %ebx
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 20(%ebp), %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: cmpl 8(%ebp), %esi
+; X86-NEXT: sbbl 12(%ebp), %eax
+; X86-NEXT: sbbl 16(%ebp), %edi
+; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: setl %al
; X86-NEXT: subb %cl, %al
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll
index 4925f8bc6c8b0..392bc83d9d5d8 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix.ll
@@ -307,69 +307,70 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $80, %esp
+; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: movl 16(%ebp), %eax
; X86-NEXT: movl 20(%ebp), %edx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: shldl $31, %eax, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: shldl $31, %edi, %esi
+; X86-NEXT: shldl $31, %ecx, %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ebx
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: sets %al
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: sets %cl
-; X86-NEXT: xorb %al, %cl
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 20(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %esi
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: sets %al
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: sets %bl
+; X86-NEXT: xorb %al, %bl
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
-; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: testb %bl, %al
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %edx
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index e7727a0ab6178..7df490f984928 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -370,67 +370,68 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $96, %esp
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl 12(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %esi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %eax, %edi
-; X86-NEXT: shldl $31, %ecx, %eax
+; X86-NEXT: subl $128, %esp
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: movl 16(%ebp), %ecx
+; X86-NEXT: movl 20(%ebp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll $31, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: shldl $31, %edi, %ebx
+; X86-NEXT: shldl $31, %esi, %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: shll $31, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 20(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %esi
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %edi
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: sets %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sbbl $0, %ebx
; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: sets %dl
-; X86-NEXT: xorb %al, %dl
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl %eax
+; X86-NEXT: sets %al
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: sets %cl
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -438,41 +439,38 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovel %esi, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %edi
; X86-NEXT: sbbl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: sbbl $0, %ecx
; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovgel %ecx, %edi
-; X86-NEXT: movl %edi, %eax
; X86-NEXT: cmovgel %ecx, %ebx
+; X86-NEXT: cmovgel %ecx, %eax
; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovgel %ecx, %esi
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: sbbl %ebx, %edi
-; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: cmovgel %ecx, %edi
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: negl %esi
+; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovgel %eax, %esi
+; X86-NEXT: cmovgel %eax, %edi
; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: cmovgel %eax, %edx
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -805,137 +803,155 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $208, %esp
-; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: subl $240, %esp
+; X86-NEXT: movl 12(%ebp), %esi
+; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: movl 16(%ebp), %ebx
-; X86-NEXT: movl 32(%ebp), %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: leal (%ebx,%ebx), %eax
; X86-NEXT: shrl $31, %ebx
; X86-NEXT: shldl $31, %eax, %ebx
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: leal (%ecx,%ecx), %edx
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %edx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 36(%ebp)
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: leal (%edi,%edi), %eax
+; X86-NEXT: shrl $31, %edi
+; X86-NEXT: shldl $31, %eax, %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl 32(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 36(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 28(%ebp), %edx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: sarl $31, %edi
-; X86-NEXT: leal (%ecx,%ecx), %eax
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: leal (%esi,%esi), %eax
+; X86-NEXT: shrl $31, %esi
+; X86-NEXT: shldl $31, %eax, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 40(%ebp), %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: leal (%ecx,%ecx), %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: leal (%ecx,%ecx), %eax
; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %edx, %ecx
+; X86-NEXT: shldl $31, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 40(%ebp)
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl 28(%ebp)
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
@@ -949,18 +965,18 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: sets %bl
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: sets %bh
-; X86-NEXT: xorb %bl, %bh
+; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
; X86-NEXT: orl %edi, %esi
-; X86-NEXT: setne %bl
-; X86-NEXT: testb %bh, %bl
+; X86-NEXT: setne %bh
+; X86-NEXT: testb %bl, %bh
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
@@ -1107,36 +1123,24 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: sets %ah
-; X86-NEXT: xorb %al, %ah
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl 40(%ebp)
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: sets %cl
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -1144,38 +1148,38 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %ebx
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovgel %eax, %esi
-; X86-NEXT: cmovgel %eax, %ecx
; X86-NEXT: cmovgel %eax, %edi
+; X86-NEXT: cmovgel %eax, %ecx
+; X86-NEXT: cmovgel %eax, %esi
; X86-NEXT: movl $-1, %edx
; X86-NEXT: cmovgel %edx, %ebx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: movl $-1, %eax
-; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl $-1, %eax
; X86-NEXT: sbbl %ecx, %eax
; X86-NEXT: movl $-1, %eax
-; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: movl $0, %eax
; X86-NEXT: cmovgel %eax, %ebx
-; X86-NEXT: cmovgel %edx, %edi
-; X86-NEXT: shldl $31, %ebx, %edi
+; X86-NEXT: cmovgel %edx, %esi
+; X86-NEXT: shldl $31, %ebx, %esi
; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 76cb4e87bae18..dfeef48897e06 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -792,14 +792,24 @@ define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32
define void @combineShiftOfShiftedLogic(i128 %a1, i32 %a2, ptr %p) {
; X86-LABEL: combineShiftOfShiftedLogic:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: movl %eax, 20(%ecx)
; X86-NEXT: movl $0, 16(%ecx)
; X86-NEXT: movl $0, 12(%ecx)
; X86-NEXT: movl $0, 8(%ecx)
; X86-NEXT: movl $0, 4(%ecx)
; X86-NEXT: movl $0, (%ecx)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: combineShiftOfShiftedLogic:
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 767bd772ab7a3..9323cd5b1917f 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -212,9 +212,18 @@ entry:
}
define void @test_lshr_i128_outofrange(i128 %x, ptr nocapture %r) nounwind {
-; ALL-LABEL: test_lshr_i128_outofrange:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: ret{{[l|q]}}
+; i686-LABEL: test_lshr_i128_outofrange:
+; i686: # %bb.0: # %entry
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: movl %ebp, %esp
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl
+;
+; x86_64-LABEL: test_lshr_i128_outofrange:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: retq
entry:
%0 = lshr i128 %x, -1
store i128 %0, ptr %r, align 16
@@ -222,9 +231,18 @@ entry:
}
define void @test_ashr_i128_outofrange(i128 %x, ptr nocapture %r) nounwind {
-; ALL-LABEL: test_ashr_i128_outofrange:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: ret{{[l|q]}}
+; i686-LABEL: test_ashr_i128_outofrange:
+; i686: # %bb.0: # %entry
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: movl %ebp, %esp
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl
+;
+; x86_64-LABEL: test_ashr_i128_outofrange:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: retq
entry:
%0 = ashr i128 %x, -1
store i128 %0, ptr %r, align 16
@@ -232,9 +250,18 @@ entry:
}
define void @test_shl_i128_outofrange(i128 %x, ptr nocapture %r) nounwind {
-; ALL-LABEL: test_shl_i128_outofrange:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: ret{{[l|q]}}
+; i686-LABEL: test_shl_i128_outofrange:
+; i686: # %bb.0: # %entry
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: movl %ebp, %esp
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl
+;
+; x86_64-LABEL: test_shl_i128_outofrange:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: retq
entry:
%0 = shl i128 %x, -1
store i128 %0, ptr %r, align 16
@@ -874,26 +901,31 @@ define <2 x i256> @shl_zext_lshr_outofrange(<2 x i128> %a0) {
define i128 @lshr_shl_mask(i128 %a0) {
; i686-LABEL: lshr_shl_mask:
; i686: # %bb.0:
-; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %ebp
; i686-NEXT: .cfi_def_cfa_offset 8
+; i686-NEXT: .cfi_offset %ebp, -8
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: .cfi_def_cfa_register %ebp
+; i686-NEXT: pushl %edi
; i686-NEXT: pushl %esi
-; i686-NEXT: .cfi_def_cfa_offset 12
-; i686-NEXT: .cfi_offset %esi, -12
-; i686-NEXT: .cfi_offset %edi, -8
-; i686-NEXT: movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT: movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT: movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: .cfi_offset %esi, -16
+; i686-NEXT: .cfi_offset %edi, -12
+; i686-NEXT: movl 8(%ebp), %eax
+; i686-NEXT: movl 24(%ebp), %ecx
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
; i686-NEXT: movl $2147483647, %edi # imm = 0x7FFFFFFF
-; i686-NEXT: andl {{[0-9]+}}(%esp), %edi
+; i686-NEXT: andl 36(%ebp), %edi
; i686-NEXT: movl %edi, 12(%eax)
; i686-NEXT: movl %esi, 8(%eax)
; i686-NEXT: movl %edx, 4(%eax)
; i686-NEXT: movl %ecx, (%eax)
+; i686-NEXT: leal -8(%ebp), %esp
; i686-NEXT: popl %esi
-; i686-NEXT: .cfi_def_cfa_offset 8
; i686-NEXT: popl %edi
-; i686-NEXT: .cfi_def_cfa_offset 4
+; i686-NEXT: popl %ebp
+; i686-NEXT: .cfi_def_cfa %esp, 4
; i686-NEXT: retl $4
;
; x86_64-LABEL: lshr_shl_mask:
diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll
index 86891e964d96d..509d4443e930a 100644
--- a/llvm/test/CodeGen/X86/smax.ll
+++ b/llvm/test/CodeGen/X86/smax.ll
@@ -151,31 +151,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmpl 24(%ebp), %ebx
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl 28(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovll %ebx, %edx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: cmovll 24(%ebp), %ebx
+; X86-NEXT: cmovll 28(%ebp), %edi
+; X86-NEXT: cmovll 32(%ebp), %edx
+; X86-NEXT: cmovll %esi, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -717,29 +720,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: cmovll %esi, %ecx
-; X86-NEXT: cmovll %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll
index 8907f6c4cd598..5e9fe27b41d2c 100644
--- a/llvm/test/CodeGen/X86/smin.ll
+++ b/llvm/test/CodeGen/X86/smin.ll
@@ -151,32 +151,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %edx, %ebx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: cmpl %ecx, 24(%ebp)
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovll %ebx, %edx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmovll %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebp, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: cmovll 24(%ebp), %ecx
+; X86-NEXT: cmovll 28(%ebp), %edx
+; X86-NEXT: cmovll 32(%ebp), %esi
+; X86-NEXT: cmovll %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -718,29 +720,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: cmovll %esi, %ecx
-; X86-NEXT: cmovll %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll
index 6a52acfe2fb30..7f17299b39e33 100644
--- a/llvm/test/CodeGen/X86/ucmp.ll
+++ b/llvm/test/CodeGen/X86/ucmp.ll
@@ -107,29 +107,33 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; X86-LABEL: ucmp.8.128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: cmpl %eax, 24(%ebp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 16(%ebp), %ebx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl %ecx, %eax
; X86-NEXT: setb %al
-; X86-NEXT: cmpl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl 8(%ebp), %edi
+; X86-NEXT: cmpl 24(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %ebx
+; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: sbbb $0, %al
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 5b1e0545502b8..82dfeeee13293 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -153,26 +153,28 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 20(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrl %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: shll $31, %eax
-; X86-NEXT: movl %esp, %esi
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __udivti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 30a7f80b2315d..3da5973f9f903 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -194,32 +194,34 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 20(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrl %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: shll $31, %eax
-; X86-NEXT: movl %esp, %esi
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __udivti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $-1, %eax
; X86-NEXT: movl $-1, %edx
; X86-NEXT: jne .LBB4_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: .LBB4_2:
; X86-NEXT: leal -4(%ebp), %esp
diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index f589d4a7b04a9..7ef859978cdbf 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -232,31 +232,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmpl 24(%ebp), %ebx
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl 28(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovbl %ebx, %edx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: cmovbl 24(%ebp), %ebx
+; X86-NEXT: cmovbl 28(%ebp), %edi
+; X86-NEXT: cmovbl 32(%ebp), %edx
+; X86-NEXT: cmovbl %esi, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -282,37 +285,40 @@ define i128 @test_i128_1(i128 %a) nounwind {
; X86-LABEL: test_i128_1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
; X86-NEXT: cmpl $1, %eax
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: movl $1, %edi
-; X86-NEXT: cmovnel %eax, %edi
-; X86-NEXT: cmovel %ebx, %edi
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: negl %ebp
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl $1, %ebp
-; X86-NEXT: cmovbl %eax, %ebp
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %ebx
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: cmovel %edi, %ebp
-; X86-NEXT: cmovel %edx, %ebx
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: cmpl $0, 28(%ebp)
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: cmovnel %eax, %esi
+; X86-NEXT: cmovel %ecx, %esi
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ecx, %ebx
+; X86-NEXT: movl $1, %ebx
+; X86-NEXT: cmovbl %eax, %ebx
+; X86-NEXT: cmovbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: cmovel %esi, %ebx
+; X86-NEXT: cmovel 28(%ebp), %edi
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1312,29 +1318,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: cmovbl %esi, %ecx
-; X86-NEXT: cmovbl %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll
index 7a5cdbb9ce758..c927abf3a4263 100644
--- a/llvm/test/CodeGen/X86/umin.ll
+++ b/llvm/test/CodeGen/X86/umin.ll
@@ -147,32 +147,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %edx, %ebx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: cmpl %ecx, 24(%ebp)
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovbl %ebx, %edx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmovbl %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebp, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: cmovbl 24(%ebp), %ecx
+; X86-NEXT: cmovbl 28(%ebp), %edx
+; X86-NEXT: cmovbl 32(%ebp), %esi
+; X86-NEXT: cmovbl %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -727,29 +729,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: cmovbl %esi, %ecx
-; X86-NEXT: cmovbl %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index 4c3170304b980..89afd1b00444b 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -38,8 +38,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: .cfi_def_cfa_offset 44
+; X86-NEXT: subl $28, %esp
+; X86-NEXT: .cfi_def_cfa_offset 48
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
@@ -147,7 +147,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, 16(%ecx)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $28, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
index a15d633d85381..12dccca76eb19 100644
--- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll
+++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
@@ -92,6 +92,8 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: subl $8, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -101,15 +103,15 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: jge .LBB4_2
+; CHECK-NEXT: jge .LBB4_3
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: popl %esi
-; CHECK-NEXT: .cfi_def_cfa_offset 4
-; CHECK-NEXT: retl
-; CHECK-NEXT: .LBB4_2: # %bb2
-; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: jmp .LBB4_2
+; CHECK-NEXT: .LBB4_3: # %bb2
; CHECK-NEXT: movl $2, %eax
+; CHECK-NEXT: .LBB4_2: # %bb1
+; CHECK-NEXT: addl $8, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: popl %esi
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
More information about the llvm-commits
mailing list