[llvm] 5ee1c0b - [windows] Always pass fp128 arguments indirectly (#128848)

Thu Mar 6 04:14:15 PST 2025

Author: Trevor Gross
Date: 2025-03-06T20:14:11+08:00
New Revision: 5ee1c0b7148571ed9d60e447b66fb0f35de14576

URL: https://github.com/llvm/llvm-project/commit/5ee1c0b7148571ed9d60e447b66fb0f35de14576
DIFF: https://github.com/llvm/llvm-project/commit/5ee1c0b7148571ed9d60e447b66fb0f35de14576.diff

LOG: [windows] Always pass fp128 arguments indirectly (#128848)

LLVM currently expects `__float128` to be both passed and returned in
xmm registers on Windows. However, this disagrees with the Windows
x86-64 calling convention [1], which indicates values larger than 64
bits should be passed indirectly.

Update LLVM's default Windows calling convention to pass `fp128`
directly.  Returning in xmm0 is unchanged since this seems like a
reasonable extrapolation of the ABI. With this patch, the calling
convention for `i128` and `f128` is the same.

GCC passes `__float128` indirectly, which this also matches. However, it
also returns indirectly, which is not done here. I intend to attempt a
GCC change to also return in `xmm0` rather than making that change here,
given the consistency with `i128`.

This corresponds to the frontend change in [2], see more details there.

[1]:
https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170
[2]: https://github.com/llvm/llvm-project/pull/115052

Added: 
    llvm/test/CodeGen/X86/fp128-abi.ll

Modified: 
    llvm/docs/ReleaseNotes.md
    llvm/lib/Target/X86/X86CallingConv.td
    llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
    llvm/test/CodeGen/X86/fp128-libcalls.ll
    llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
    llvm/test/CodeGen/X86/scalar-fp-to-i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index fe3b508d5c5b0..f2a477d9bdcc9 100644

--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -122,6 +122,9 @@ Changes to the WebAssembly Backend
 Changes to the Windows Target
 -----------------------------
 
+* `fp128` is now passed indirectly, meaning it uses the same calling convention
+  as `i128`.
+
 Changes to the X86 Backend
 --------------------------
 

diff  --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index cf164acba9ec0..0d087e057a2bd 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -639,8 +639,9 @@ def CC_X86_Win64_C : CallingConv<[
   // 512 bit vectors are passed by pointer
   CCIfType<[v64i8, v32i16, v16i32, v32f16, v32bf16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
 
-  // Long doubles are passed by pointer
+  // Float types larger than 64-bits (long double and fp128) are passed by pointer
   CCIfType<[f80], CCPassIndirect<i64>>,
+  CCIfType<[f128], CCPassIndirect<i64>>,
 
   // If SSE was disabled, pass FP values smaller than 64-bits as integers in
   // GPRs or on the stack.

diff  --git a/llvm/test/CodeGen/X86/fp128-abi.ll b/llvm/test/CodeGen/X86/fp128-abi.ll
new file mode 100644
index 0000000000000..526ed7c72f73f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp128-abi.ll
@@ -0,0 +1,659 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X64
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC64
+; RUN: llc < %s -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MINGW
+
+define fp128 @return(ptr %p) {
+; CHECK-X64-LABEL: return:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    movaps (%rdi), %xmm0
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: return:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:    .cfi_offset %esi, -12
+; CHECK-X86-NEXT:    .cfi_offset %edi, -8
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT:    movl (%ecx), %edx
+; CHECK-X86-NEXT:    movl 4(%ecx), %esi
+; CHECK-X86-NEXT:    movl 8(%ecx), %edi
+; CHECK-X86-NEXT:    movl 12(%ecx), %ecx
+; CHECK-X86-NEXT:    movl %ecx, 12(%eax)
+; CHECK-X86-NEXT:    movl %edi, 8(%eax)
+; CHECK-X86-NEXT:    movl %esi, 4(%eax)
+; CHECK-X86-NEXT:    movl %edx, (%eax)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl $4
+;
+; CHECK-MSVC64-LABEL: return:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: return:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl (%ecx), %edx
+; CHECK-MSVC32-NEXT:    movl 4(%ecx), %esi
+; CHECK-MSVC32-NEXT:    movl 8(%ecx), %edi
+; CHECK-MSVC32-NEXT:    movl 12(%ecx), %ecx
+; CHECK-MSVC32-NEXT:    movl %ecx, 12(%eax)
+; CHECK-MSVC32-NEXT:    movl %edi, 8(%eax)
+; CHECK-MSVC32-NEXT:    movl %esi, 4(%eax)
+; CHECK-MSVC32-NEXT:    movl %edx, (%eax)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: return:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-NEXT:    retq
+  %r = load fp128, ptr %p, align 16
+  ret fp128 %r
+}
+
+define fp128 @first_arg(fp128 %x) {
+; CHECK-X64-LABEL: first_arg:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: first_arg:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:    .cfi_offset %esi, -12
+; CHECK-X86-NEXT:    .cfi_offset %edi, -8
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl %edi, 12(%eax)
+; CHECK-X86-NEXT:    movl %esi, 8(%eax)
+; CHECK-X86-NEXT:    movl %edx, 4(%eax)
+; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl $4
+;
+; CHECK-MSVC64-LABEL: first_arg:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: first_arg:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
+; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
+; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
+; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: first_arg:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-NEXT:    retq
+  ret fp128 %x
+}
+
+define fp128 @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, fp128 %x) {
+; CHECK-X64-LABEL: leading_args:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: leading_args:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:    .cfi_offset %esi, -12
+; CHECK-X86-NEXT:    .cfi_offset %edi, -8
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl %edi, 12(%eax)
+; CHECK-X86-NEXT:    movl %esi, 8(%eax)
+; CHECK-X86-NEXT:    movl %edx, 4(%eax)
+; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl $4
+;
+; CHECK-MSVC64-LABEL: leading_args:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movaps (%rax), %xmm0
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: leading_args:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
+; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
+; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
+; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: leading_args:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movaps (%rax), %xmm0
+; CHECK-MINGW-NEXT:    retq
+  ret fp128 %x
+}
+
+define fp128 @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, fp128 %_5, fp128 %x) {
+; CHECK-X64-LABEL: many_leading_args:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    movaps %xmm1, %xmm0
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: many_leading_args:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:    .cfi_offset %esi, -12
+; CHECK-X86-NEXT:    .cfi_offset %edi, -8
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl %edi, 12(%eax)
+; CHECK-X86-NEXT:    movl %esi, 8(%eax)
+; CHECK-X86-NEXT:    movl %edx, 4(%eax)
+; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl $4
+;
+; CHECK-MSVC64-LABEL: many_leading_args:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movaps (%rax), %xmm0
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: many_leading_args:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
+; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
+; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
+; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: many_leading_args:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movaps (%rax), %xmm0
+; CHECK-MINGW-NEXT:    retq
+  ret fp128 %x
+}
+
+define fp128 @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, fp128 %x, i64 %_5) {
+; CHECK-X64-LABEL: trailing_arg:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: trailing_arg:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    pushl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    pushl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:    .cfi_offset %esi, -12
+; CHECK-X86-NEXT:    .cfi_offset %edi, -8
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT:    movl %edi, 12(%eax)
+; CHECK-X86-NEXT:    movl %esi, 8(%eax)
+; CHECK-X86-NEXT:    movl %edx, 4(%eax)
+; CHECK-X86-NEXT:    movl %ecx, (%eax)
+; CHECK-X86-NEXT:    popl %esi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:    popl %edi
+; CHECK-X86-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT:    retl $4
+;
+; CHECK-MSVC64-LABEL: trailing_arg:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movaps (%rax), %xmm0
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: trailing_arg:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %edi
+; CHECK-MSVC32-NEXT:    pushl %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-MSVC32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT:    movl %edi, 12(%eax)
+; CHECK-MSVC32-NEXT:    movl %esi, 8(%eax)
+; CHECK-MSVC32-NEXT:    movl %edx, 4(%eax)
+; CHECK-MSVC32-NEXT:    movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT:    popl %esi
+; CHECK-MSVC32-NEXT:    popl %edi
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: trailing_arg:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movaps (%rax), %xmm0
+; CHECK-MINGW-NEXT:    retq
+  ret fp128 %x
+}
+
+define void @call_first_arg(fp128 %x) nounwind {
+; CHECK-X64-LABEL: call_first_arg:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    pushq %rax
+; CHECK-X64-NEXT:    callq first_arg at PLT
+; CHECK-X64-NEXT:    popq %rax
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: call_first_arg:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    subl $40, %esp
+; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    calll first_arg at PLT
+; CHECK-X86-NEXT:    addl $56, %esp
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-MSVC64-LABEL: call_first_arg:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    subq $56, %rsp
+; CHECK-MSVC64-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MSVC64-NEXT:    callq first_arg
+; CHECK-MSVC64-NEXT:    addq $56, %rsp
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: call_first_arg:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    subl $32, %esp
+; CHECK-MSVC32-NEXT:    movl %esp, %eax
+; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
+; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    calll _first_arg
+; CHECK-MSVC32-NEXT:    addl $20, %esp
+; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    popl %ebp
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: call_first_arg:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    subq $56, %rsp
+; CHECK-MINGW-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MINGW-NEXT:    callq first_arg
+; CHECK-MINGW-NEXT:    addq $56, %rsp
+; CHECK-MINGW-NEXT:    retq
+  call i128 @first_arg(fp128 %x)
+  ret void
+}
+
+define void @call_leading_args(fp128 %x) nounwind {
+; CHECK-X64-LABEL: call_leading_args:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    pushq %rax
+; CHECK-X64-NEXT:    xorl %edi, %edi
+; CHECK-X64-NEXT:    xorl %esi, %esi
+; CHECK-X64-NEXT:    xorl %edx, %edx
+; CHECK-X64-NEXT:    xorl %ecx, %ecx
+; CHECK-X64-NEXT:    callq leading_args at PLT
+; CHECK-X64-NEXT:    popq %rax
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: call_leading_args:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    subl $40, %esp
+; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    calll leading_args at PLT
+; CHECK-X86-NEXT:    addl $88, %esp
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-MSVC64-LABEL: call_leading_args:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    subq $72, %rsp
+; CHECK-MSVC64-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    xorl %ecx, %ecx
+; CHECK-MSVC64-NEXT:    xorl %edx, %edx
+; CHECK-MSVC64-NEXT:    xorl %r8d, %r8d
+; CHECK-MSVC64-NEXT:    xorl %r9d, %r9d
+; CHECK-MSVC64-NEXT:    callq leading_args
+; CHECK-MSVC64-NEXT:    addq $72, %rsp
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: call_leading_args:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    subl $32, %esp
+; CHECK-MSVC32-NEXT:    movl %esp, %eax
+; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    calll _leading_args
+; CHECK-MSVC32-NEXT:    addl $52, %esp
+; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    popl %ebp
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: call_leading_args:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    subq $72, %rsp
+; CHECK-MINGW-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    xorl %ecx, %ecx
+; CHECK-MINGW-NEXT:    xorl %edx, %edx
+; CHECK-MINGW-NEXT:    xorl %r8d, %r8d
+; CHECK-MINGW-NEXT:    xorl %r9d, %r9d
+; CHECK-MINGW-NEXT:    callq leading_args
+; CHECK-MINGW-NEXT:    addq $72, %rsp
+; CHECK-MINGW-NEXT:    retq
+  call i128 @leading_args(i64 0, i64 0, i64 0, i64 0, fp128 %x)
+  ret void
+}
+
+define void @call_many_leading_args(fp128 %x) nounwind {
+; CHECK-X64-LABEL: call_many_leading_args:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    pushq %rax
+; CHECK-X64-NEXT:    movaps %xmm0, %xmm1
+; CHECK-X64-NEXT:    xorps %xmm0, %xmm0
+; CHECK-X64-NEXT:    xorl %edi, %edi
+; CHECK-X64-NEXT:    xorl %esi, %esi
+; CHECK-X64-NEXT:    xorl %edx, %edx
+; CHECK-X64-NEXT:    xorl %ecx, %ecx
+; CHECK-X64-NEXT:    callq many_leading_args at PLT
+; CHECK-X64-NEXT:    popq %rax
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: call_many_leading_args:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    subl $40, %esp
+; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    calll many_leading_args at PLT
+; CHECK-X86-NEXT:    addl $104, %esp
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-MSVC64-LABEL: call_many_leading_args:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    subq $88, %rsp
+; CHECK-MSVC64-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-NEXT:    xorps %xmm1, %xmm1
+; CHECK-MSVC64-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    xorl %ecx, %ecx
+; CHECK-MSVC64-NEXT:    xorl %edx, %edx
+; CHECK-MSVC64-NEXT:    xorl %r8d, %r8d
+; CHECK-MSVC64-NEXT:    xorl %r9d, %r9d
+; CHECK-MSVC64-NEXT:    callq many_leading_args
+; CHECK-MSVC64-NEXT:    addq $88, %rsp
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: call_many_leading_args:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    subl $32, %esp
+; CHECK-MSVC32-NEXT:    movl %esp, %eax
+; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    calll _many_leading_args
+; CHECK-MSVC32-NEXT:    addl $68, %esp
+; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    popl %ebp
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: call_many_leading_args:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    subq $88, %rsp
+; CHECK-MINGW-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-NEXT:    xorps %xmm1, %xmm1
+; CHECK-MINGW-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    xorl %ecx, %ecx
+; CHECK-MINGW-NEXT:    xorl %edx, %edx
+; CHECK-MINGW-NEXT:    xorl %r8d, %r8d
+; CHECK-MINGW-NEXT:    xorl %r9d, %r9d
+; CHECK-MINGW-NEXT:    callq many_leading_args
+; CHECK-MINGW-NEXT:    addq $88, %rsp
+; CHECK-MINGW-NEXT:    retq
+  call i128 @many_leading_args(i64 0, i64 0, i64 0, i64 0, fp128 0xL0, fp128 %x)
+  ret void
+}
+
+define void @call_trailing_arg(fp128 %x) nounwind {
+; CHECK-X64-LABEL: call_trailing_arg:
+; CHECK-X64:       # %bb.0:
+; CHECK-X64-NEXT:    pushq %rax
+; CHECK-X64-NEXT:    xorl %edi, %edi
+; CHECK-X64-NEXT:    xorl %esi, %esi
+; CHECK-X64-NEXT:    xorl %edx, %edx
+; CHECK-X64-NEXT:    xorl %ecx, %ecx
+; CHECK-X64-NEXT:    callq trailing_arg at PLT
+; CHECK-X64-NEXT:    popq %rax
+; CHECK-X64-NEXT:    retq
+;
+; CHECK-X86-LABEL: call_trailing_arg:
+; CHECK-X86:       # %bb.0:
+; CHECK-X86-NEXT:    subl $40, %esp
+; CHECK-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl $0
+; CHECK-X86-NEXT:    pushl %eax
+; CHECK-X86-NEXT:    calll trailing_arg at PLT
+; CHECK-X86-NEXT:    addl $88, %esp
+; CHECK-X86-NEXT:    retl
+;
+; CHECK-MSVC64-LABEL: call_trailing_arg:
+; CHECK-MSVC64:       # %bb.0:
+; CHECK-MSVC64-NEXT:    subq $72, %rsp
+; CHECK-MSVC64-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-NEXT:    xorl %ecx, %ecx
+; CHECK-MSVC64-NEXT:    xorl %edx, %edx
+; CHECK-MSVC64-NEXT:    xorl %r8d, %r8d
+; CHECK-MSVC64-NEXT:    xorl %r9d, %r9d
+; CHECK-MSVC64-NEXT:    callq trailing_arg
+; CHECK-MSVC64-NEXT:    addq $72, %rsp
+; CHECK-MSVC64-NEXT:    retq
+;
+; CHECK-MSVC32-LABEL: call_trailing_arg:
+; CHECK-MSVC32:       # %bb.0:
+; CHECK-MSVC32-NEXT:    pushl %ebp
+; CHECK-MSVC32-NEXT:    movl %esp, %ebp
+; CHECK-MSVC32-NEXT:    andl $-16, %esp
+; CHECK-MSVC32-NEXT:    subl $32, %esp
+; CHECK-MSVC32-NEXT:    movl %esp, %eax
+; CHECK-MSVC32-NEXT:    pushl 20(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 16(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 12(%ebp)
+; CHECK-MSVC32-NEXT:    pushl 8(%ebp)
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl $0
+; CHECK-MSVC32-NEXT:    pushl %eax
+; CHECK-MSVC32-NEXT:    calll _trailing_arg
+; CHECK-MSVC32-NEXT:    addl $52, %esp
+; CHECK-MSVC32-NEXT:    movl %ebp, %esp
+; CHECK-MSVC32-NEXT:    popl %ebp
+; CHECK-MSVC32-NEXT:    retl
+;
+; CHECK-MINGW-LABEL: call_trailing_arg:
+; CHECK-MINGW:       # %bb.0:
+; CHECK-MINGW-NEXT:    subq $72, %rsp
+; CHECK-MINGW-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-NEXT:    xorl %ecx, %ecx
+; CHECK-MINGW-NEXT:    xorl %edx, %edx
+; CHECK-MINGW-NEXT:    xorl %r8d, %r8d
+; CHECK-MINGW-NEXT:    xorl %r9d, %r9d
+; CHECK-MINGW-NEXT:    callq trailing_arg
+; CHECK-MINGW-NEXT:    addq $72, %rsp
+; CHECK-MINGW-NEXT:    retq
+  call i128 @trailing_arg(i64 0, i64 0, i64 0, i64 0, fp128 %x)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index 5263e0d4f6f39..a85b53ea62ac7 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -8,6 +8,12 @@
 ; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+sse2 \
 ; RUN:     -enable-legalize-types-checking \
 ; RUN:     | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -O2 -mtriple=x86_64-pc-windows-msvc \
+; RUN:     -enable-legalize-types-checking \
+; RUN:     | FileCheck %s --check-prefix=WIN
+; RUN: llc < %s -O2 -mtriple=i686-pc-windows-msvc \
+; RUN:     -enable-legalize-types-checking \
+; RUN:     | FileCheck %s --check-prefix=WIN-X86
 
 ; Check all soft floating point library function calls.
 
@@ -57,6 +63,55 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: add:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __addtf3
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: add:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___addtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %add = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y,  metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %add
@@ -108,6 +163,55 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: sub:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __subtf3
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: sub:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___subtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sub = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y,  metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %sub
@@ -159,6 +263,55 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: mul:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __multf3
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: mul:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___multf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %mul = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y,  metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %mul
@@ -210,6 +363,55 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: div:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __divtf3
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: div:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___divtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %div = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y,  metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %div
@@ -258,6 +460,62 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: fma:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $88, %rsp
+; WIN-NEXT:    movaps (%r8), %xmm0
+; WIN-NEXT:    movaps (%rcx), %xmm1
+; WIN-NEXT:    movaps (%rdx), %xmm2
+; WIN-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
+; WIN-NEXT:    callq fmal
+; WIN-NEXT:    addq $88, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: fma:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 56(%ebp)
+; WIN-X86-NEXT:    pushl 52(%ebp)
+; WIN-X86-NEXT:    pushl 48(%ebp)
+; WIN-X86-NEXT:    pushl 44(%ebp)
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fmal
+; WIN-X86-NEXT:    addl $52, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %fma = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y,  fp128 %z, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %fma
@@ -302,6 +560,55 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: frem:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: frem:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %div = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y,  metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %div
@@ -342,6 +649,48 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: ceil:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq ceill
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: ceil:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _ceill
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %ceil = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %ceil
@@ -382,6 +731,48 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: acos:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq acosl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: acos:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _acosl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %acos = call fp128 @llvm.experimental.constrained.acos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %acos
@@ -422,6 +813,48 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: cos:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq cosl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: cos:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _cosl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %cos = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %cos
@@ -462,6 +895,48 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: cosh:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq coshl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: cosh:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _coshl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %cosh = call fp128 @llvm.experimental.constrained.cosh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %cosh
@@ -502,6 +977,48 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: exp:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq expl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: exp:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _expl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %exp = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %exp
@@ -542,6 +1059,48 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: exp2:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq exp2l
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: exp2:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _exp2l
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %exp2 = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %exp2
@@ -582,6 +1141,48 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: floor:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq floorl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: floor:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _floorl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %floor = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %floor
@@ -622,6 +1223,48 @@ define fp128 @log(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: log:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq logl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: log:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _logl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %log = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %log
@@ -662,6 +1305,48 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: log10:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq log10l
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: log10:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _log10l
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %log10 = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %log10
@@ -702,6 +1387,48 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: log2:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq log2l
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: log2:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _log2l
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %log2 = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %log2
@@ -746,6 +1473,55 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: maxnum:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq fmaxl
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: maxnum:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fmaxl
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %maxnum = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
   ret fp128 %maxnum
@@ -790,6 +1566,55 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: minnum:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq fminl
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: minnum:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fminl
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %minnum = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
   ret fp128 %minnum
@@ -830,6 +1655,48 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: nearbyint:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: nearbyint:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _nearbyintl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %nearbyint = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %nearbyint
@@ -874,6 +1741,55 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: pow:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq powl
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: pow:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _powl
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %pow = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %pow
@@ -922,6 +1838,49 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: powi:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq __powitf2
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: powi:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___powitf2
+; WIN-X86-NEXT:    addl $24, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %powi = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %powi
@@ -962,6 +1921,48 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: rint:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq rintl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: rint:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _rintl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %rint = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %rint
@@ -1002,6 +2003,48 @@ define fp128 @round(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: round:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq roundl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: round:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _roundl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %round = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %round
@@ -1042,6 +2085,48 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: roundeven:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq roundevenl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: roundeven:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _roundevenl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %roundeven = call fp128 @llvm.experimental.constrained.roundeven.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %roundeven
@@ -1082,6 +2167,48 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: asin:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq asinl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: asin:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _asinl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %asin = call fp128 @llvm.experimental.constrained.asin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %asin
@@ -1122,6 +2249,48 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: sin:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq sinl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: sin:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _sinl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sin = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %sin
@@ -1162,6 +2331,48 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: sinh:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq sinhl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: sinh:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _sinhl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sinh = call fp128 @llvm.experimental.constrained.sinh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %sinh
@@ -1202,6 +2413,48 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: sqrt:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: sqrt:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _sqrtl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %sqrt
@@ -1242,6 +2495,48 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: atan:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq atanl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: atan:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _atanl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %atan = call fp128 @llvm.experimental.constrained.atan.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %atan
@@ -1286,6 +2581,55 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: atan2:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq atan2l
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: atan2:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _atan2l
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %atan2 = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %atan2
@@ -1326,6 +2670,48 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: tan:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq tanl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: tan:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _tanl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %tan = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %tan
@@ -1366,6 +2752,48 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: tanh:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq tanhl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: tanh:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _tanhl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %tanh = call fp128 @llvm.experimental.constrained.tanh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret fp128 %tanh
@@ -1406,6 +2834,48 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: trunc:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq truncl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: trunc:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _truncl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 8(%esi)
+; WIN-X86-NEXT:    movl %edx, 12(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %trunc = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %trunc
@@ -1436,6 +2906,26 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    calll lrintl
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: lrint:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq lrintl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: lrint:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll _lrintl
+; WIN-X86-NEXT:    addl $16, %esp
+; WIN-X86-NEXT:    retl
 entry:
   %rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret i32 %rint
@@ -1466,6 +2956,26 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    calll llrintl
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: llrint:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq llrintl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: llrint:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll _llrintl
+; WIN-X86-NEXT:    addl $16, %esp
+; WIN-X86-NEXT:    retl
 entry:
   %rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret i64 %rint
@@ -1496,6 +3006,26 @@ define i32 @lround(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    calll lroundl
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: lround:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq lroundl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: lround:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll _lroundl
+; WIN-X86-NEXT:    addl $16, %esp
+; WIN-X86-NEXT:    retl
 entry:
   %round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i32 %round
@@ -1526,6 +3056,26 @@ define i64 @llround(fp128 %x) nounwind strictfp {
 ; X86-NEXT:    calll llroundl
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: llround:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq llroundl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: llround:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll _llroundl
+; WIN-X86-NEXT:    addl $16, %esp
+; WIN-X86-NEXT:    retl
 entry:
   %round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i64 %round
@@ -1601,6 +3151,52 @@ define i64 @cmp(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
 ; X86-NEXT:    movl 4(%ecx), %edx
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: cmp:
+; WIN:       # %bb.0:
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    pushq %rdi
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movq %rdx, %rsi
+; WIN-NEXT:    movq %rcx, %rdi
+; WIN-NEXT:    movaps (%r8), %xmm0
+; WIN-NEXT:    movaps (%r9), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __eqtf2
+; WIN-NEXT:    testl %eax, %eax
+; WIN-NEXT:    cmovneq %rsi, %rdi
+; WIN-NEXT:    movq %rdi, %rax
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    popq %rdi
+; WIN-NEXT:    popq %rsi
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: cmp:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll ___eqtf2
+; WIN-X86-NEXT:    addl $32, %esp
+; WIN-X86-NEXT:    testl %eax, %eax
+; WIN-X86-NEXT:    je LBB37_1
+; WIN-X86-NEXT:  # %bb.2:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    jmp LBB37_3
+; WIN-X86-NEXT:  LBB37_1:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:  LBB37_3:
+; WIN-X86-NEXT:    movl (%ecx), %eax
+; WIN-X86-NEXT:    movl 4(%ecx), %edx
+; WIN-X86-NEXT:    retl
   %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
                                                fp128 %x, fp128 %y,
                                                metadata !"oeq",
@@ -1679,6 +3275,52 @@ define i64 @cmps(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
 ; X86-NEXT:    movl 4(%ecx), %edx
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: cmps:
+; WIN:       # %bb.0:
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    pushq %rdi
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movq %rdx, %rsi
+; WIN-NEXT:    movq %rcx, %rdi
+; WIN-NEXT:    movaps (%r8), %xmm0
+; WIN-NEXT:    movaps (%r9), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __eqtf2
+; WIN-NEXT:    testl %eax, %eax
+; WIN-NEXT:    cmovneq %rsi, %rdi
+; WIN-NEXT:    movq %rdi, %rax
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    popq %rdi
+; WIN-NEXT:    popq %rsi
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: cmps:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll ___eqtf2
+; WIN-X86-NEXT:    addl $32, %esp
+; WIN-X86-NEXT:    testl %eax, %eax
+; WIN-X86-NEXT:    je LBB38_1
+; WIN-X86-NEXT:  # %bb.2:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    jmp LBB38_3
+; WIN-X86-NEXT:  LBB38_1:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:  LBB38_3:
+; WIN-X86-NEXT:    movl (%ecx), %eax
+; WIN-X86-NEXT:    movl 4(%ecx), %edx
+; WIN-X86-NEXT:    retl
   %cond = call i1 @llvm.experimental.constrained.fcmps.f128(
                                                fp128 %x, fp128 %y,
                                                metadata !"oeq",
@@ -1815,6 +3457,92 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: cmp_ueq_q:
+; WIN:       # %bb.0:
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    pushq %rdi
+; WIN-NEXT:    pushq %rbx
+; WIN-NEXT:    subq $128, %rsp
+; WIN-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN-NEXT:    movq %rdx, %rsi
+; WIN-NEXT:    movq %rcx, %rdi
+; WIN-NEXT:    movaps (%r8), %xmm6
+; WIN-NEXT:    movaps (%r9), %xmm7
+; WIN-NEXT:    movaps %xmm7, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm6, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __eqtf2
+; WIN-NEXT:    movaps %xmm7, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm6, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    testl %eax, %eax
+; WIN-NEXT:    sete %bl
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __unordtf2
+; WIN-NEXT:    testl %eax, %eax
+; WIN-NEXT:    setne %al
+; WIN-NEXT:    orb %bl, %al
+; WIN-NEXT:    cmoveq %rsi, %rdi
+; WIN-NEXT:    movq %rdi, %rax
+; WIN-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN-NEXT:    addq $128, %rsp
+; WIN-NEXT:    popq %rbx
+; WIN-NEXT:    popq %rdi
+; WIN-NEXT:    popq %rsi
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: cmp_ueq_q:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    pushl %ebx
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll ___eqtf2
+; WIN-X86-NEXT:    addl $32, %esp
+; WIN-X86-NEXT:    testl %eax, %eax
+; WIN-X86-NEXT:    sete %bl
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll ___unordtf2
+; WIN-X86-NEXT:    addl $32, %esp
+; WIN-X86-NEXT:    testl %eax, %eax
+; WIN-X86-NEXT:    setne %al
+; WIN-X86-NEXT:    orb %bl, %al
+; WIN-X86-NEXT:    jne LBB39_1
+; WIN-X86-NEXT:  # %bb.2:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    jmp LBB39_3
+; WIN-X86-NEXT:  LBB39_1:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:  LBB39_3:
+; WIN-X86-NEXT:    movl (%ecx), %eax
+; WIN-X86-NEXT:    movl 4(%ecx), %edx
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebx
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
                                                fp128 %x, fp128 %y,
                                                metadata !"ueq",
@@ -1951,6 +3679,92 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: cmp_one_q:
+; WIN:       # %bb.0:
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    pushq %rdi
+; WIN-NEXT:    pushq %rbx
+; WIN-NEXT:    subq $128, %rsp
+; WIN-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN-NEXT:    movq %rdx, %rsi
+; WIN-NEXT:    movq %rcx, %rdi
+; WIN-NEXT:    movaps (%r8), %xmm6
+; WIN-NEXT:    movaps (%r9), %xmm7
+; WIN-NEXT:    movaps %xmm7, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm6, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __eqtf2
+; WIN-NEXT:    movaps %xmm7, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm6, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    testl %eax, %eax
+; WIN-NEXT:    setne %bl
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __unordtf2
+; WIN-NEXT:    testl %eax, %eax
+; WIN-NEXT:    sete %al
+; WIN-NEXT:    testb %bl, %al
+; WIN-NEXT:    cmoveq %rsi, %rdi
+; WIN-NEXT:    movq %rdi, %rax
+; WIN-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN-NEXT:    addq $128, %rsp
+; WIN-NEXT:    popq %rbx
+; WIN-NEXT:    popq %rdi
+; WIN-NEXT:    popq %rsi
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: cmp_one_q:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    pushl %ebx
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll ___eqtf2
+; WIN-X86-NEXT:    addl $32, %esp
+; WIN-X86-NEXT:    testl %eax, %eax
+; WIN-X86-NEXT:    setne %bl
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT:    calll ___unordtf2
+; WIN-X86-NEXT:    addl $32, %esp
+; WIN-X86-NEXT:    testl %eax, %eax
+; WIN-X86-NEXT:    sete %al
+; WIN-X86-NEXT:    testb %bl, %al
+; WIN-X86-NEXT:    jne LBB40_1
+; WIN-X86-NEXT:  # %bb.2:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    jmp LBB40_3
+; WIN-X86-NEXT:  LBB40_1:
+; WIN-X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:  LBB40_3:
+; WIN-X86-NEXT:    movl (%ecx), %eax
+; WIN-X86-NEXT:    movl 4(%ecx), %edx
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebx
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
                                                fp128 %x, fp128 %y,
                                                metadata !"one",

diff  --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index 0831675095d74..05c02a2139b5a 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -5,6 +5,10 @@
 ; RUN:     -enable-legalize-types-checking | FileCheck %s --check-prefix=GNU
 ; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=sse2 \
 ; RUN:     -enable-legalize-types-checking | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -O2 -mtriple=x86_64-pc-windows-msvc \
+; RUN:     -enable-legalize-types-checking | FileCheck %s --check-prefix=WIN
+; RUN: llc < %s -O2 -mtriple=i686-pc-windows-msvc \
+; RUN:     -enable-legalize-types-checking | FileCheck %s --check-prefix=WIN-X86
 
 ; Check all soft floating point library function calls.
 
@@ -55,6 +59,52 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Add:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __addtf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Add:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___addtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %add = fadd fp128 %d1, %d2
   store fp128 %add, ptr @vf128, align 16
@@ -111,6 +161,52 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128_1Add:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps vf128(%rip), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __addtf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128_1Add:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl _vf128+12
+; WIN-X86-NEXT:    pushl _vf128+8
+; WIN-X86-NEXT:    pushl _vf128+4
+; WIN-X86-NEXT:    pushl _vf128
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___addtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+8
+; WIN-X86-NEXT:    movl %edx, _vf128+12
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %0 = load fp128, ptr @vf128, align 16
   %add = fadd fp128 %0, %d1
@@ -162,6 +258,52 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Sub:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __subtf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Sub:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___subtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sub = fsub fp128 %d1, %d2
   store fp128 %sub, ptr @vf128, align 16
@@ -218,6 +360,52 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128_1Sub:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps vf128(%rip), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __subtf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128_1Sub:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl _vf128+12
+; WIN-X86-NEXT:    pushl _vf128+8
+; WIN-X86-NEXT:    pushl _vf128+4
+; WIN-X86-NEXT:    pushl _vf128
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___subtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+8
+; WIN-X86-NEXT:    movl %edx, _vf128+12
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %0 = load fp128, ptr @vf128, align 16
   %sub = fsub fp128 %0, %d1
@@ -269,6 +457,52 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Mul:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __multf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Mul:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___multf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %mul = fmul fp128 %d1, %d2
   store fp128 %mul, ptr @vf128, align 16
@@ -325,6 +559,52 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128_1Mul:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps vf128(%rip), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __multf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128_1Mul:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl _vf128+12
+; WIN-X86-NEXT:    pushl _vf128+8
+; WIN-X86-NEXT:    pushl _vf128+4
+; WIN-X86-NEXT:    pushl _vf128
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___multf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+8
+; WIN-X86-NEXT:    movl %edx, _vf128+12
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %0 = load fp128, ptr @vf128, align 16
   %mul = fmul fp128 %0, %d1
@@ -376,6 +656,52 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Div:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __divtf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Div:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___divtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %div = fdiv fp128 %d1, %d2
   store fp128 %div, ptr @vf128, align 16
@@ -432,6 +758,52 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128_1Div:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps vf128(%rip), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq __divtf3
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128_1Div:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl _vf128+12
+; WIN-X86-NEXT:    pushl _vf128+8
+; WIN-X86-NEXT:    pushl _vf128+4
+; WIN-X86-NEXT:    pushl _vf128
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll ___divtf3
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+8
+; WIN-X86-NEXT:    movl %edx, _vf128+12
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %0 = load fp128, ptr @vf128, align 16
   %div = fdiv fp128 %0, %d1
@@ -475,6 +847,52 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Rem:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Rem:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %div = frem fp128 %d1, %d2
   store fp128 %div, ptr @vf128, align 16
@@ -521,6 +939,52 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128_1Rem:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps vf128(%rip), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128_1Rem:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl _vf128+12
+; WIN-X86-NEXT:    pushl _vf128+8
+; WIN-X86-NEXT:    pushl _vf128+4
+; WIN-X86-NEXT:    pushl _vf128
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+8
+; WIN-X86-NEXT:    movl %edx, _vf128+12
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %0 = load fp128, ptr @vf128, align 16
   %div = frem fp128 %0, %d1
@@ -560,6 +1024,45 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Sqrt:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Sqrt:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _sqrtl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.sqrt.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -599,6 +1102,45 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Sin:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq sinl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Sin:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _sinl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.sin.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -638,6 +1180,45 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Cos:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq cosl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Cos:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _cosl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.cos.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -677,6 +1258,45 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Ceil:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq ceill
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Ceil:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _ceill
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.ceil.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -716,6 +1336,45 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Floor:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq floorl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Floor:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _floorl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.floor.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -755,6 +1414,45 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Trunc:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq truncl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Trunc:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _truncl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.trunc.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -794,6 +1492,45 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Nearbyint:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Nearbyint:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _nearbyintl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.nearbyint.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -833,6 +1570,45 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Rint:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq rintl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Rint:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _rintl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.rint.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -872,6 +1648,45 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
 ; X86-NEXT:    movaps %xmm0, vf128
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
+;
+; WIN-LABEL: Test128Round:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq roundl
+; WIN-NEXT:    movaps %xmm0, vf128(%rip)
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Round:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $32, %esp
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl 8(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _roundl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; WIN-X86-NEXT:    movl %esi, _vf128+12
+; WIN-X86-NEXT:    movl %edx, _vf128+8
+; WIN-X86-NEXT:    movl %ecx, _vf128+4
+; WIN-X86-NEXT:    movl %eax, _vf128
+; WIN-X86-NEXT:    leal -4(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %sqrt = call fp128 @llvm.round.f128(fp128 %d1)
   store fp128 %sqrt, ptr @vf128, align 16
@@ -916,6 +1731,62 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128FMA:
+; WIN:       # %bb.0: # %entry
+; WIN-NEXT:    subq $88, %rsp
+; WIN-NEXT:    movaps (%r8), %xmm0
+; WIN-NEXT:    movaps (%rcx), %xmm1
+; WIN-NEXT:    movaps (%rdx), %xmm2
+; WIN-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
+; WIN-NEXT:    callq fmal
+; WIN-NEXT:    addq $88, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128FMA:
+; WIN-X86:       # %bb.0: # %entry
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 56(%ebp)
+; WIN-X86-NEXT:    pushl 52(%ebp)
+; WIN-X86-NEXT:    pushl 48(%ebp)
+; WIN-X86-NEXT:    pushl 44(%ebp)
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _fmal
+; WIN-X86-NEXT:    addl $52, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
 entry:
   %call = call fp128 @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
   ret fp128 %call
@@ -951,6 +1822,48 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Acos:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq acosl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Acos:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _acosl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.acos.f128(fp128 %a)
   ret fp128 %x
 }
@@ -984,6 +1897,48 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Asin:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq asinl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Asin:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _asinl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.asin.f128(fp128 %a)
   ret fp128 %x
 }
@@ -1017,6 +1972,48 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Atan:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq atanl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Atan:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _atanl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.atan.f128(fp128 %a)
   ret fp128 %x
 }
@@ -1054,6 +2051,55 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Atan2:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    callq atan2l
+; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Atan2:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 40(%ebp)
+; WIN-X86-NEXT:    pushl 36(%ebp)
+; WIN-X86-NEXT:    pushl 32(%ebp)
+; WIN-X86-NEXT:    pushl 28(%ebp)
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _atan2l
+; WIN-X86-NEXT:    addl $36, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b)
   ret fp128 %x
 }
@@ -1087,6 +2133,48 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Cosh:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq coshl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Cosh:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _coshl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.cosh.f128(fp128 %a)
   ret fp128 %x
 }
@@ -1120,6 +2208,48 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Sinh:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq sinhl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Sinh:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _sinhl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.sinh.f128(fp128 %a)
   ret fp128 %x
 }
@@ -1153,6 +2283,48 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Tan:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq tanl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Tan:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _tanl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.tan.f128(fp128 %a)
   ret fp128 %x
 }
@@ -1186,6 +2358,48 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl $4
+;
+; WIN-LABEL: Test128Tanh:
+; WIN:       # %bb.0:
+; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    callq tanhl
+; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    retq
+;
+; WIN-X86-LABEL: Test128Tanh:
+; WIN-X86:       # %bb.0:
+; WIN-X86-NEXT:    pushl %ebp
+; WIN-X86-NEXT:    movl %esp, %ebp
+; WIN-X86-NEXT:    pushl %edi
+; WIN-X86-NEXT:    pushl %esi
+; WIN-X86-NEXT:    andl $-16, %esp
+; WIN-X86-NEXT:    subl $16, %esp
+; WIN-X86-NEXT:    movl 8(%ebp), %esi
+; WIN-X86-NEXT:    movl %esp, %eax
+; WIN-X86-NEXT:    pushl 24(%ebp)
+; WIN-X86-NEXT:    pushl 20(%ebp)
+; WIN-X86-NEXT:    pushl 16(%ebp)
+; WIN-X86-NEXT:    pushl 12(%ebp)
+; WIN-X86-NEXT:    pushl %eax
+; WIN-X86-NEXT:    calll _tanhl
+; WIN-X86-NEXT:    addl $20, %esp
+; WIN-X86-NEXT:    movl (%esp), %eax
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; WIN-X86-NEXT:    movl %edi, 12(%esi)
+; WIN-X86-NEXT:    movl %edx, 8(%esi)
+; WIN-X86-NEXT:    movl %ecx, 4(%esi)
+; WIN-X86-NEXT:    movl %eax, (%esi)
+; WIN-X86-NEXT:    movl %esi, %eax
+; WIN-X86-NEXT:    leal -8(%ebp), %esp
+; WIN-X86-NEXT:    popl %esi
+; WIN-X86-NEXT:    popl %edi
+; WIN-X86-NEXT:    popl %ebp
+; WIN-X86-NEXT:    retl
   %x = call fp128 @llvm.tanh.f128(fp128 %a)
   ret fp128 %x
 }

diff  --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
index b22533a8c8ee2..50a967e1c2a1a 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
@@ -780,9 +780,12 @@ define i32 @t_to_u32(fp128 %a) nounwind {
 ;
 ; X64-AVX512-WIN-LABEL: t_to_u32:
 ; X64-AVX512-WIN:       # %bb.0:
-; X64-AVX512-WIN-NEXT:    subq $40, %rsp
+; X64-AVX512-WIN-NEXT:    subq $56, %rsp
+; X64-AVX512-WIN-NEXT:    vmovaps (%rcx), %xmm0
+; X64-AVX512-WIN-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-AVX512-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-AVX512-WIN-NEXT:    callq __fixunstfsi
-; X64-AVX512-WIN-NEXT:    addq $40, %rsp
+; X64-AVX512-WIN-NEXT:    addq $56, %rsp
 ; X64-AVX512-WIN-NEXT:    retq
 ;
 ; X64-AVX512-LIN-LABEL: t_to_u32:
@@ -815,9 +818,12 @@ define i32 @t_to_u32(fp128 %a) nounwind {
 ;
 ; X64-SSE-WIN-LABEL: t_to_u32:
 ; X64-SSE-WIN:       # %bb.0:
-; X64-SSE-WIN-NEXT:    subq $40, %rsp
+; X64-SSE-WIN-NEXT:    subq $56, %rsp
+; X64-SSE-WIN-NEXT:    movaps (%rcx), %xmm0
+; X64-SSE-WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-SSE-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-SSE-WIN-NEXT:    callq __fixunstfsi
-; X64-SSE-WIN-NEXT:    addq $40, %rsp
+; X64-SSE-WIN-NEXT:    addq $56, %rsp
 ; X64-SSE-WIN-NEXT:    retq
 ;
 ; X64-SSE-LIN-LABEL: t_to_u32:
@@ -872,9 +878,12 @@ define i32 @t_to_s32(fp128 %a) nounwind {
 ;
 ; X64-AVX512-WIN-LABEL: t_to_s32:
 ; X64-AVX512-WIN:       # %bb.0:
-; X64-AVX512-WIN-NEXT:    subq $40, %rsp
+; X64-AVX512-WIN-NEXT:    subq $56, %rsp
+; X64-AVX512-WIN-NEXT:    vmovaps (%rcx), %xmm0
+; X64-AVX512-WIN-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-AVX512-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-AVX512-WIN-NEXT:    callq __fixtfsi
-; X64-AVX512-WIN-NEXT:    addq $40, %rsp
+; X64-AVX512-WIN-NEXT:    addq $56, %rsp
 ; X64-AVX512-WIN-NEXT:    retq
 ;
 ; X64-AVX512-LIN-LABEL: t_to_s32:
@@ -907,9 +916,12 @@ define i32 @t_to_s32(fp128 %a) nounwind {
 ;
 ; X64-SSE-WIN-LABEL: t_to_s32:
 ; X64-SSE-WIN:       # %bb.0:
-; X64-SSE-WIN-NEXT:    subq $40, %rsp
+; X64-SSE-WIN-NEXT:    subq $56, %rsp
+; X64-SSE-WIN-NEXT:    movaps (%rcx), %xmm0
+; X64-SSE-WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-SSE-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-SSE-WIN-NEXT:    callq __fixtfsi
-; X64-SSE-WIN-NEXT:    addq $40, %rsp
+; X64-SSE-WIN-NEXT:    addq $56, %rsp
 ; X64-SSE-WIN-NEXT:    retq
 ;
 ; X64-SSE-LIN-LABEL: t_to_s32:

diff  --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index 86d4be9cb7af6..f516db8b30ffe 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -1435,9 +1435,12 @@ define i64 @t_to_u64(fp128 %a) nounwind {
 ;
 ; X64-AVX512-WIN-LABEL: t_to_u64:
 ; X64-AVX512-WIN:       # %bb.0:
-; X64-AVX512-WIN-NEXT:    subq $40, %rsp
+; X64-AVX512-WIN-NEXT:    subq $56, %rsp
+; X64-AVX512-WIN-NEXT:    vmovaps (%rcx), %xmm0
+; X64-AVX512-WIN-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-AVX512-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-AVX512-WIN-NEXT:    callq __fixunstfdi
-; X64-AVX512-WIN-NEXT:    addq $40, %rsp
+; X64-AVX512-WIN-NEXT:    addq $56, %rsp
 ; X64-AVX512-WIN-NEXT:    retq
 ;
 ; X64-AVX512-LIN-LABEL: t_to_u64:
@@ -1470,9 +1473,12 @@ define i64 @t_to_u64(fp128 %a) nounwind {
 ;
 ; X64-SSE-WIN-LABEL: t_to_u64:
 ; X64-SSE-WIN:       # %bb.0:
-; X64-SSE-WIN-NEXT:    subq $40, %rsp
+; X64-SSE-WIN-NEXT:    subq $56, %rsp
+; X64-SSE-WIN-NEXT:    movaps (%rcx), %xmm0
+; X64-SSE-WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-SSE-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-SSE-WIN-NEXT:    callq __fixunstfdi
-; X64-SSE-WIN-NEXT:    addq $40, %rsp
+; X64-SSE-WIN-NEXT:    addq $56, %rsp
 ; X64-SSE-WIN-NEXT:    retq
 ;
 ; X64-SSE-LIN-LABEL: t_to_u64:
@@ -1527,9 +1533,12 @@ define i64 @t_to_s64(fp128 %a) nounwind {
 ;
 ; X64-AVX512-WIN-LABEL: t_to_s64:
 ; X64-AVX512-WIN:       # %bb.0:
-; X64-AVX512-WIN-NEXT:    subq $40, %rsp
+; X64-AVX512-WIN-NEXT:    subq $56, %rsp
+; X64-AVX512-WIN-NEXT:    vmovaps (%rcx), %xmm0
+; X64-AVX512-WIN-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-AVX512-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-AVX512-WIN-NEXT:    callq __fixtfdi
-; X64-AVX512-WIN-NEXT:    addq $40, %rsp
+; X64-AVX512-WIN-NEXT:    addq $56, %rsp
 ; X64-AVX512-WIN-NEXT:    retq
 ;
 ; X64-AVX512-LIN-LABEL: t_to_s64:
@@ -1562,9 +1571,12 @@ define i64 @t_to_s64(fp128 %a) nounwind {
 ;
 ; X64-SSE-WIN-LABEL: t_to_s64:
 ; X64-SSE-WIN:       # %bb.0:
-; X64-SSE-WIN-NEXT:    subq $40, %rsp
+; X64-SSE-WIN-NEXT:    subq $56, %rsp
+; X64-SSE-WIN-NEXT:    movaps (%rcx), %xmm0
+; X64-SSE-WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-SSE-WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; X64-SSE-WIN-NEXT:    callq __fixtfdi
-; X64-SSE-WIN-NEXT:    addq $40, %rsp
+; X64-SSE-WIN-NEXT:    addq $56, %rsp
 ; X64-SSE-WIN-NEXT:    retq
 ;
 ; X64-SSE-LIN-LABEL: t_to_s64: