[llvm-branch-commits] [llvm] 08e5e91 - [X86] Remove [ER]SP from all CSR lists

Reid Kleckner via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 6 09:55:04 PST 2021


Author: Reid Kleckner
Date: 2021-01-06T09:50:46-08:00
New Revision: 08e5e91e45af96e564f3c19be7ed4cb00bac3034

URL: https://github.com/llvm/llvm-project/commit/08e5e91e45af96e564f3c19be7ed4cb00bac3034
DIFF: https://github.com/llvm/llvm-project/commit/08e5e91e45af96e564f3c19be7ed4cb00bac3034.diff

LOG: [X86] Remove [ER]SP from all CSR lists

The CSR lists control which registers are spilled and reloaded in the
prologue and epilogue. The stack pointer is managed explicitly, and
should never be pushed or popped. Remove it from these lists. This
affected regcall and preserves all / most.

Differential Revision: https://reviews.llvm.org/D94118

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86CallingConv.td
    llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
    llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
    llvm/test/CodeGen/X86/dynamic-regmask.ll
    llvm/test/CodeGen/X86/ipra-reg-usage.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 9e414ceeb781..3735fab818ce 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -1102,7 +1102,7 @@ def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)
 
 // All GPRs - except r11
 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
-                                              R8, R9, R10, RSP)>;
+                                              R8, R9, R10)>;
 
 // All registers - except r11
 def CSR_64_RT_AllRegs     : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
@@ -1160,17 +1160,16 @@ def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RSI, R14, R15,
 def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
 
 // Register calling convention preserves few GPR and XMM8-15
-def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
+def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
 def CSR_32_RegCall       : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
                                            (sequence "XMM%u", 4, 7))>;
 def CSR_Win32_CFGuard_Check_NoSSE : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, ECX)>;
 def CSR_Win32_CFGuard_Check       : CalleeSavedRegs<(add CSR_32_RegCall, ECX)>;
-def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
                                               (sequence "R%u", 10, 15))>;
 def CSR_Win64_RegCall       : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,                                  
                                               (sequence "XMM%u", 8, 15))>;
-def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
                                                (sequence "R%u", 12, 15))>;
 def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
                                                (sequence "XMM%u", 8, 15))>;
-                                               

diff  --git a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
index 799747b2aba6..ffe5ffb64af1 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
@@ -283,8 +283,7 @@ declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
 define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)  {
 ; X32-LABEL: test_argv32i1:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $72, %esp
+; X32-NEXT:    subl $76, %esp
 ; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -303,8 +302,7 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1,
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
-; X32-NEXT:    addl $72, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $76, %esp
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
@@ -316,10 +314,8 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1,
 ; WIN64-NEXT:    .seh_pushreg %r11
 ; WIN64-NEXT:    pushq %r10
 ; WIN64-NEXT:    .seh_pushreg %r10
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $152, %rsp
-; WIN64-NEXT:    .seh_stackalloc 152
+; WIN64-NEXT:    subq $128, %rsp
+; WIN64-NEXT:    .seh_stackalloc 128
 ; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
 ; WIN64-NEXT:    .seh_setframe %rbp, 128
 ; WIN64-NEXT:    .seh_endprologue
@@ -339,8 +335,7 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1,
 ; WIN64-NEXT:    vzeroupper
 ; WIN64-NEXT:    callq test_argv32i1helper
 ; WIN64-NEXT:    nop
-; WIN64-NEXT:    leaq 24(%rbp), %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    movq %rbp, %rsp
 ; WIN64-NEXT:    popq %r10
 ; WIN64-NEXT:    popq %r11
 ; WIN64-NEXT:    popq %rbp
@@ -349,9 +344,7 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1,
 ;
 ; LINUXOSX64-LABEL: test_argv32i1:
 ; LINUXOSX64:       # %bb.0: # %entry
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $128, %rsp
+; LINUXOSX64-NEXT:    subq $136, %rsp
 ; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -361,7 +354,6 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1,
 ; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
@@ -388,9 +380,7 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1,
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
-; LINUXOSX64-NEXT:    addq $128, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $136, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    vzeroupper
 ; LINUXOSX64-NEXT:    retq
@@ -518,8 +508,7 @@ declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
 define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)  {
 ; X32-LABEL: test_argv16i1:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $72, %esp
+; X32-NEXT:    subl $76, %esp
 ; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -539,8 +528,7 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1,
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
-; X32-NEXT:    addl $72, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $76, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_argv16i1:
@@ -549,10 +537,8 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1,
 ; WIN64-NEXT:    .seh_pushreg %r11
 ; WIN64-NEXT:    pushq %r10
 ; WIN64-NEXT:    .seh_pushreg %r10
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $80, %rsp
-; WIN64-NEXT:    .seh_stackalloc 80
+; WIN64-NEXT:    subq $88, %rsp
+; WIN64-NEXT:    .seh_stackalloc 88
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    kmovd %edx, %k0
 ; WIN64-NEXT:    kmovd %eax, %k1
@@ -569,8 +555,7 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1,
 ; WIN64-NEXT:    vzeroupper
 ; WIN64-NEXT:    callq test_argv16i1helper
 ; WIN64-NEXT:    nop
-; WIN64-NEXT:    addq $80, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $88, %rsp
 ; WIN64-NEXT:    popq %r10
 ; WIN64-NEXT:    popq %r11
 ; WIN64-NEXT:    retq
@@ -578,9 +563,7 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1,
 ;
 ; LINUXOSX64-LABEL: test_argv16i1:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $128, %rsp
+; LINUXOSX64-NEXT:    subq $136, %rsp
 ; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -590,7 +573,6 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1,
 ; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
@@ -618,9 +600,7 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1,
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
-; LINUXOSX64-NEXT:    addq $128, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $136, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
@@ -752,8 +732,7 @@ declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
 define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)  {
 ; X32-LABEL: test_argv8i1:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $72, %esp
+; X32-NEXT:    subl $76, %esp
 ; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -773,8 +752,7 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
-; X32-NEXT:    addl $72, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $76, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_argv8i1:
@@ -783,10 +761,8 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x
 ; WIN64-NEXT:    .seh_pushreg %r11
 ; WIN64-NEXT:    pushq %r10
 ; WIN64-NEXT:    .seh_pushreg %r10
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $80, %rsp
-; WIN64-NEXT:    .seh_stackalloc 80
+; WIN64-NEXT:    subq $88, %rsp
+; WIN64-NEXT:    .seh_stackalloc 88
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    kmovd %edx, %k0
 ; WIN64-NEXT:    kmovd %eax, %k1
@@ -803,8 +779,7 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x
 ; WIN64-NEXT:    vzeroupper
 ; WIN64-NEXT:    callq test_argv8i1helper
 ; WIN64-NEXT:    nop
-; WIN64-NEXT:    addq $80, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $88, %rsp
 ; WIN64-NEXT:    popq %r10
 ; WIN64-NEXT:    popq %r11
 ; WIN64-NEXT:    retq
@@ -812,9 +787,7 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x
 ;
 ; LINUXOSX64-LABEL: test_argv8i1:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $128, %rsp
+; LINUXOSX64-NEXT:    subq $136, %rsp
 ; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -824,7 +797,6 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x
 ; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
@@ -852,9 +824,7 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
-; LINUXOSX64-NEXT:    addq $128, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $136, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)

diff  --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
index 19a5b7fbd938..bc15e33816e6 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
@@ -30,37 +30,34 @@ define dso_local x86_regcallcc i1 @test_argReti1(i1 %a)  {
 define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a)  {
 ; X32-LABEL: test_CallargReti1:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    incb %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    calll _test_argReti1
 ; X32-NEXT:    incb %al
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargReti1:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    incb %al
 ; WIN64-NEXT:    movzbl %al, %eax
 ; WIN64-NEXT:    callq test_argReti1
 ; WIN64-NEXT:    incb %al
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rcx
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargReti1:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    incb %al
 ; LINUXOSX64-NEXT:    movzbl %al, %eax
 ; LINUXOSX64-NEXT:    callq test_argReti1
 ; LINUXOSX64-NEXT:    incb %al
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rcx
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i1 %a, 1
@@ -96,37 +93,34 @@ define dso_local x86_regcallcc i8 @test_argReti8(i8 %a)  {
 define dso_local x86_regcallcc i8 @test_CallargReti8(i8 %a)  {
 ; X32-LABEL: test_CallargReti8:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    incb %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    calll _test_argReti8
 ; X32-NEXT:    incb %al
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargReti8:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    incb %al
 ; WIN64-NEXT:    movzbl %al, %eax
 ; WIN64-NEXT:    callq test_argReti8
 ; WIN64-NEXT:    incb %al
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rcx
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargReti8:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    incb %al
 ; LINUXOSX64-NEXT:    movzbl %al, %eax
 ; LINUXOSX64-NEXT:    callq test_argReti8
 ; LINUXOSX64-NEXT:    incb %al
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rcx
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i8 %a, 1
@@ -162,40 +156,37 @@ define dso_local x86_regcallcc i16 @test_argReti16(i16 %a)  {
 define dso_local x86_regcallcc i16 @test_CallargReti16(i16 %a)  {
 ; X32-LABEL: test_CallargReti16:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    incl %eax
 ; X32-NEXT:    calll _test_argReti16
 ; X32-NEXT:    # kill: def $ax killed $ax def $eax
 ; X32-NEXT:    incl %eax
 ; X32-NEXT:    # kill: def $ax killed $ax killed $eax
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargReti16:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    incl %eax
 ; WIN64-NEXT:    callq test_argReti16
 ; WIN64-NEXT:    # kill: def $ax killed $ax def $eax
 ; WIN64-NEXT:    incl %eax
 ; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rcx
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargReti16:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    callq test_argReti16
 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax def $eax
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rcx
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i16 %a, 1
@@ -228,34 +219,31 @@ define dso_local x86_regcallcc i32 @test_argReti32(i32 %a)  {
 define dso_local x86_regcallcc i32 @test_CallargReti32(i32 %a)  {
 ; X32-LABEL: test_CallargReti32:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    incl %eax
 ; X32-NEXT:    calll _test_argReti32
 ; X32-NEXT:    incl %eax
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargReti32:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    incl %eax
 ; WIN64-NEXT:    callq test_argReti32
 ; WIN64-NEXT:    incl %eax
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rcx
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargReti32:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    callq test_argReti32
 ; LINUXOSX64-NEXT:    incl %eax
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rcx
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i32 %a, 1
@@ -291,36 +279,33 @@ define dso_local x86_regcallcc i64 @test_argReti64(i64 %a)  {
 define dso_local x86_regcallcc i64 @test_CallargReti64(i64 %a)  {
 ; X32-LABEL: test_CallargReti64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    addl $1, %eax
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    calll _test_argReti64
 ; X32-NEXT:    addl $1, %eax
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargReti64:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    incq %rax
 ; WIN64-NEXT:    callq test_argReti64
 ; WIN64-NEXT:    incq %rax
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rcx
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargReti64:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    incq %rax
 ; LINUXOSX64-NEXT:    callq test_argReti64
 ; LINUXOSX64-NEXT:    incq %rax
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rcx
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i64 %a, 1
@@ -353,24 +338,20 @@ define dso_local x86_regcallcc float @test_argRetFloat(float %a)  {
 define dso_local x86_regcallcc float @test_CallargRetFloat(float %a)  {
 ; X32-LABEL: test_CallargRetFloat:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $24, %esp
+; X32-NEXT:    subl $28, %esp
 ; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
 ; X32-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
 ; X32-NEXT:    vaddss %xmm4, %xmm0, %xmm0
 ; X32-NEXT:    calll _test_argRetFloat
 ; X32-NEXT:    vaddss %xmm4, %xmm0, %xmm0
 ; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
-; X32-NEXT:    addl $24, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $28, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRetFloat:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $16, %rsp
-; WIN64-NEXT:    .seh_stackalloc 16
+; WIN64-NEXT:    subq $24, %rsp
+; WIN64-NEXT:    .seh_stackalloc 24
 ; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; WIN64-NEXT:    .seh_savexmm %xmm8, 0
 ; WIN64-NEXT:    .seh_endprologue
@@ -379,28 +360,22 @@ define dso_local x86_regcallcc float @test_CallargRetFloat(float %a)  {
 ; WIN64-NEXT:    callq test_argRetFloat
 ; WIN64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
 ; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
-; WIN64-NEXT:    addq $16, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $24, %rsp
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRetFloat:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $16, %rsp
+; LINUXOSX64-NEXT:    subq $24, %rsp
 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
 ; LINUXOSX64-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
 ; LINUXOSX64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
 ; LINUXOSX64-NEXT:    callq test_argRetFloat
 ; LINUXOSX64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
-; LINUXOSX64-NEXT:    addq $16, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $24, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd float 1.0, %a
@@ -433,24 +408,20 @@ define dso_local x86_regcallcc double @test_argRetDouble(double %a)  {
 define dso_local x86_regcallcc double @test_CallargRetDouble(double %a)  {
 ; X32-LABEL: test_CallargRetDouble:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $24, %esp
+; X32-NEXT:    subl $28, %esp
 ; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
 ; X32-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
 ; X32-NEXT:    vaddsd %xmm4, %xmm0, %xmm0
 ; X32-NEXT:    calll _test_argRetDouble
 ; X32-NEXT:    vaddsd %xmm4, %xmm0, %xmm0
 ; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
-; X32-NEXT:    addl $24, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $28, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRetDouble:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $16, %rsp
-; WIN64-NEXT:    .seh_stackalloc 16
+; WIN64-NEXT:    subq $24, %rsp
+; WIN64-NEXT:    .seh_stackalloc 24
 ; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; WIN64-NEXT:    .seh_savexmm %xmm8, 0
 ; WIN64-NEXT:    .seh_endprologue
@@ -459,28 +430,22 @@ define dso_local x86_regcallcc double @test_CallargRetDouble(double %a)  {
 ; WIN64-NEXT:    callq test_argRetDouble
 ; WIN64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
 ; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
-; WIN64-NEXT:    addq $16, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $24, %rsp
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRetDouble:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $16, %rsp
+; LINUXOSX64-NEXT:    subq $24, %rsp
 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
 ; LINUXOSX64-NEXT:    vmovsd {{.*#+}} xmm8 = mem[0],zero
 ; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
 ; LINUXOSX64-NEXT:    callq test_argRetDouble
 ; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
-; LINUXOSX64-NEXT:    addq $16, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $24, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd double 1.0, %a
@@ -544,34 +509,31 @@ define dso_local x86_regcallcc double @test_argParamf80(x86_fp80 %a0) nounwind {
 define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
 ; X32-LABEL: test_CallargRetf80:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    fadd %st, %st(0)
 ; X32-NEXT:    calll _test_argRetf80
 ; X32-NEXT:    fadd %st, %st(0)
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRetf80:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    fadd %st, %st(0)
 ; WIN64-NEXT:    callq test_argRetf80
 ; WIN64-NEXT:    fadd %st, %st(0)
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rax
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRetf80:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    fadd %st, %st(0)
 ; LINUXOSX64-NEXT:    callq test_argRetf80
 ; LINUXOSX64-NEXT:    fadd %st, %st(0)
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd x86_fp80 %a, %a
@@ -583,34 +545,31 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
 define dso_local x86_regcallcc double @test_CallargParamf80(x86_fp80 %a)  {
 ; X32-LABEL: test_CallargParamf80:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    fadd %st, %st(0)
 ; X32-NEXT:    calll _test_argParamf80
 ; X32-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargParamf80:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    fadd %st, %st(0)
 ; WIN64-NEXT:    callq test_argParamf80
 ; WIN64-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rax
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargParamf80:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    fadd %st, %st(0)
 ; LINUXOSX64-NEXT:    callq test_argParamf80
 ; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd x86_fp80 %a, %a
@@ -645,34 +604,31 @@ define dso_local x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a)  {
 define dso_local x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a)  {
 ; X32-LABEL: test_CallargRetPointer:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
 ; X32-NEXT:    incl %eax
 ; X32-NEXT:    calll _test_argRetPointer
 ; X32-NEXT:    incl %eax
-; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRetPointer:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
+; WIN64-NEXT:    pushq %rax
+; WIN64-NEXT:    .seh_stackalloc 8
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    incl %eax
 ; WIN64-NEXT:    callq test_argRetPointer
 ; WIN64-NEXT:    incl %eax
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    popq %rcx
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRetPointer:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
+; LINUXOSX64-NEXT:    pushq %rax
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    callq test_argRetPointer
 ; LINUXOSX64-NEXT:    incl %eax
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    popq %rcx
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = ptrtoint [4 x i32]* %a to i32
@@ -715,8 +671,7 @@ define dso_local x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x
 define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x, <4 x i32> %a)  {
 ; X32-LABEL: test_CallargRet128Vector:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $40, %esp
+; X32-NEXT:    subl $44, %esp
 ; X32-NEXT:    vmovups %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X32-NEXT:    vmovdqa %xmm1, %xmm4
 ; X32-NEXT:    vpslld $31, %xmm0, %xmm1
@@ -728,16 +683,13 @@ define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x,
 ; X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
 ; X32-NEXT:    vmovdqa32 %xmm4, %xmm0 {%k1}
 ; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm4 # 16-byte Reload
-; X32-NEXT:    addl $40, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $44, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRet128Vector:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $32, %rsp
-; WIN64-NEXT:    .seh_stackalloc 32
+; WIN64-NEXT:    subq $40, %rsp
+; WIN64-NEXT:    .seh_stackalloc 40
 ; WIN64-NEXT:    vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; WIN64-NEXT:    .seh_savexmm %xmm8, 16
 ; WIN64-NEXT:    .seh_endprologue
@@ -751,19 +703,15 @@ define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x,
 ; WIN64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; WIN64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
 ; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
-; WIN64-NEXT:    addq $32, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $40, %rsp
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRet128Vector:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $32, %rsp
+; LINUXOSX64-NEXT:    subq $40, %rsp
 ; LINUXOSX64-NEXT:    vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 48
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
 ; LINUXOSX64-NEXT:    vmovdqa %xmm1, %xmm8
 ; LINUXOSX64-NEXT:    vpslld $31, %xmm0, %xmm1
@@ -775,9 +723,7 @@ define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x,
 ; LINUXOSX64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
 ; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
-; LINUXOSX64-NEXT:    addq $32, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $40, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %a)
@@ -812,8 +758,7 @@ define dso_local x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x
 define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x, <8 x i32> %a)  {
 ; X32-LABEL: test_CallargRet256Vector:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $88, %esp
+; X32-NEXT:    subl $92, %esp
 ; X32-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
 ; X32-NEXT:    kmovd %eax, %k1
 ; X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
@@ -822,16 +767,13 @@ define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x,
 ; X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
 ; X32-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %ymm1 # 32-byte Reload
 ; X32-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
-; X32-NEXT:    addl $88, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $92, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRet256Vector:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $80, %rsp
-; WIN64-NEXT:    .seh_stackalloc 80
+; WIN64-NEXT:    subq $88, %rsp
+; WIN64-NEXT:    .seh_stackalloc 88
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
 ; WIN64-NEXT:    kmovd %eax, %k1
@@ -841,18 +783,14 @@ define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x,
 ; WIN64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; WIN64-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
 ; WIN64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
-; WIN64-NEXT:    addq $80, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $88, %rsp
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRet256Vector:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $80, %rsp
+; LINUXOSX64-NEXT:    subq $88, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 96
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
 ; LINUXOSX64-NEXT:    kmovd %eax, %k1
 ; LINUXOSX64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
@@ -861,9 +799,7 @@ define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x,
 ; LINUXOSX64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
-; LINUXOSX64-NEXT:    addq $80, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $88, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %a)
@@ -898,8 +834,7 @@ define dso_local x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <1
 define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x, <16 x i32> %a)  {
 ; X32-LABEL: test_CallargRet512Vector:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %esp
-; X32-NEXT:    subl $184, %esp
+; X32-NEXT:    subl $188, %esp
 ; X32-NEXT:    vmovdqu64 %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
 ; X32-NEXT:    kmovd %eax, %k1
 ; X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
@@ -908,16 +843,13 @@ define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x
 ; X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
 ; X32-NEXT:    vmovdqu64 {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 # 64-byte Reload
 ; X32-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
-; X32-NEXT:    addl $184, %esp
-; X32-NEXT:    popl %esp
+; X32-NEXT:    addl $188, %esp
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_CallargRet512Vector:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rsp
-; WIN64-NEXT:    .seh_pushreg %rsp
-; WIN64-NEXT:    subq $176, %rsp
-; WIN64-NEXT:    .seh_stackalloc 176
+; WIN64-NEXT:    subq $184, %rsp
+; WIN64-NEXT:    .seh_stackalloc 184
 ; WIN64-NEXT:    .seh_endprologue
 ; WIN64-NEXT:    vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
 ; WIN64-NEXT:    kmovd %eax, %k1
@@ -927,18 +859,14 @@ define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x
 ; WIN64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; WIN64-NEXT:    vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
 ; WIN64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
-; WIN64-NEXT:    addq $176, %rsp
-; WIN64-NEXT:    popq %rsp
+; WIN64-NEXT:    addq $184, %rsp
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_endproc
 ;
 ; LINUXOSX64-LABEL: test_CallargRet512Vector:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    pushq %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    subq $176, %rsp
+; LINUXOSX64-NEXT:    subq $184, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 192
-; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
 ; LINUXOSX64-NEXT:    vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
 ; LINUXOSX64-NEXT:    kmovd %eax, %k1
 ; LINUXOSX64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
@@ -947,9 +875,7 @@ define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x
 ; LINUXOSX64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
-; LINUXOSX64-NEXT:    addq $176, %rsp
-; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
-; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    addq $184, %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %a)

diff  --git a/llvm/test/CodeGen/X86/dynamic-regmask.ll b/llvm/test/CodeGen/X86/dynamic-regmask.ll
index 6280eac13ac0..dae96f8e4251 100644
--- a/llvm/test/CodeGen/X86/dynamic-regmask.ll
+++ b/llvm/test/CodeGen/X86/dynamic-regmask.ll
@@ -11,7 +11,7 @@ define i32 @caller(i32 %a0) nounwind {
   ret i32 %b2
 }
 ; CHECK:    name: caller
-; CHECK:    CALL64pcrel32 @callee, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ebp,$ebx,$esp,$hbp,$hbx,$hsp,$rbp,$rbx,$rsp,$sp,$sph,$spl,$r10,$r11,$r12,$r13,$r14,$r15,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r10b,$r11b,$r12b,$r13b,$r14b,$r15b,$r10bh,$r11bh,$r12bh,$r13bh,$r14bh,$r15bh,$r10d,$r11d,$r12d,$r13d,$r14d,$r15d,$r10w,$r11w,$r12w,$r13w,$r14w,$r15w,$r10wh,$r11wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $eax, implicit $ecx, implicit $edx, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+; CHECK:    CALL64pcrel32 @callee, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ebp,$ebx,$hbp,$hbx,$rbp,$rbx,$r10,$r11,$r12,$r13,$r14,$r15,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r10b,$r11b,$r12b,$r13b,$r14b,$r15b,$r10bh,$r11bh,$r12bh,$r13bh,$r14bh,$r15bh,$r10d,$r11d,$r12d,$r13d,$r14d,$r15d,$r10w,$r11w,$r12w,$r13w,$r14w,$r15w,$r10wh,$r11wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $eax, implicit $ecx, implicit $edx, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
 ; CHECK:    RET 0, $eax
 
 define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
@@ -24,7 +24,7 @@ define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32
   ret {i32, i32, i32} %b6
 }
 ; CHECK: name:            test_callee
-; CHECK: calleeSavedRegisters: [ '$rbx', '$rbp', '$rsp', '$r10', '$r11', '$r12',
-; CHECK:                         '$r13', '$r14', '$r15', '$xmm8', '$xmm9', '$xmm10',
-; CHECK:                         '$xmm11', '$xmm12', '$xmm13', '$xmm14', '$xmm15' ]
+; CHECK: calleeSavedRegisters: [ '$rbx', '$rbp', '$r10', '$r11', '$r12', '$r13',
+; CHECK:                         '$r14', '$r15', '$xmm8', '$xmm9', '$xmm10', '$xmm11',
+; CHECK:                         '$xmm12', '$xmm13', '$xmm14', '$xmm15' ]
 ; CHECK: RET 0, $eax, $ecx, $edx

diff  --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
index cc67c195be63..51a61aae7b30 100644
--- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-unknown"
 declare void @bar1()
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $tmmcfg $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $gs $hip $hsp $ip $mxcsr $rip $riz $rsp $sp $sph $spl $ss $ssp $tmmcfg $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7
   call void @bar1()
   call void @bar2()
   ret void


        


More information about the llvm-branch-commits mailing list