[llvm] r324530 - [X86] Auto-generate complete checks. NFC
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 7 13:29:30 PST 2018
Author: ctopper
Date: Wed Feb 7 13:29:30 2018
New Revision: 324530
URL: http://llvm.org/viewvc/llvm-project?rev=324530&view=rev
Log:
[X86] Auto-generate complete checks. NFC
Modified:
llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll
llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll
Modified: llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll?rev=324530&r1=324529&r2=324530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll Wed Feb 7 13:29:30 2018
@@ -1,33 +1,82 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck -check-prefix=WIN64 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck -check-prefix=X64 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X32 -check-prefix=X32-KNL
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X32 -check-prefix=X32-SKX
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN32 -check-prefix=WIN32-KNL
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN32 -check-prefix=WIN32-SKX
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN64 -check-prefix=WIN64-KNL
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN64 -check-prefix=WIN64-SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X64 -check-prefix=X64-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X64 -check-prefix=X64-SKX
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
declare i32 @func_int(i32, i32)
-; WIN64-LABEL: testf16_inp
-; WIN64: vaddps {{.*}}, {{%zmm[0-1]}}
-; WIN64: leaq {{.*}}(%rsp), %rcx
-; WIN64: call
-; WIN64: ret
-
-; X32-LABEL: testf16_inp
-; X32: vaddps {{.*}}, {{%zmm[0-1]}}
-; Push is not deemed profitable if we're realigning the stack.
-; X32: {{pushl|movl}} %eax
-; X32: call
-; X32: ret
-
-; X64-LABEL: testf16_inp
-; X64: vaddps {{.*}}, {{%zmm[0-1]}}
-; X64: movq %rsp, %rdi
-; X64: call
-; X64: ret
-
;test calling conventions - input parameters
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+; X32-LABEL: testf16_inp:
+; X32: ## %bb.0:
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-64, %esp
+; X32-NEXT: subl $192, %esp
+; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll _func_float16_ptr
+; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; WIN32-LABEL: testf16_inp:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-64, %esp
+; WIN32-NEXT: subl $128, %esp
+; WIN32-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; WIN32-NEXT: movl %esp, %eax
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: calll _func_float16_ptr
+; WIN32-NEXT: addl $4, %esp
+; WIN32-NEXT: vaddps (%esp), %zmm0, %zmm0
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf16_inp:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: subq $176, %rsp
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT: andq $-64, %rsp
+; WIN64-NEXT: vmovaps (%rcx), %zmm0
+; WIN64-NEXT: vaddps (%rdx), %zmm0, %zmm0
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: callq func_float16_ptr
+; WIN64-NEXT: vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0
+; WIN64-NEXT: leaq 48(%rbp), %rsp
+; WIN64-NEXT: popq %rbp
+; WIN64-NEXT: retq
+;
+; X64-LABEL: testf16_inp:
+; X64: ## %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: pushq %r13
+; X64-NEXT: pushq %r12
+; X64-NEXT: andq $-64, %rsp
+; X64-NEXT: subq $128, %rsp
+; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: callq _func_float16_ptr
+; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
+; X64-NEXT: leaq -16(%rbp), %rsp
+; X64-NEXT: popq %r12
+; X64-NEXT: popq %r13
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
@@ -38,19 +87,77 @@ define <16 x float> @testf16_inp(<16 x f
;test calling conventions - preserved registers
-; preserved zmm16-
-; WIN64-LABEL: testf16_regs
-; WIN64: call
-; WIN64: vaddps %zmm16, %zmm0, %zmm0
-; WIN64: ret
-
-; preserved zmm16-
-; X64-LABEL: testf16_regs
-; X64: call
-; X64: vaddps %zmm16, %zmm0, %zmm0
-; X64: ret
-
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+; X32-LABEL: testf16_regs:
+; X32: ## %bb.0:
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-64, %esp
+; X32-NEXT: subl $256, %esp ## imm = 0x100
+; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill
+; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll _func_float16_ptr
+; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0 ## 64-byte Folded Reload
+; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; WIN32-LABEL: testf16_regs:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-64, %esp
+; WIN32-NEXT: subl $192, %esp
+; WIN32-NEXT: vmovaps %zmm1, (%esp) # 64-byte Spill
+; WIN32-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: calll _func_float16_ptr
+; WIN32-NEXT: addl $4, %esp
+; WIN32-NEXT: vaddps (%esp), %zmm0, %zmm0 # 64-byte Folded Reload
+; WIN32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf16_regs:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: subq $176, %rsp
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT: andq $-64, %rsp
+; WIN64-NEXT: vmovaps (%rdx), %zmm16
+; WIN64-NEXT: vaddps (%rcx), %zmm16, %zmm0
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: callq func_float16_ptr
+; WIN64-NEXT: vaddps %zmm16, %zmm0, %zmm0
+; WIN64-NEXT: vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0
+; WIN64-NEXT: leaq 48(%rbp), %rsp
+; WIN64-NEXT: popq %rbp
+; WIN64-NEXT: retq
+;
+; X64-LABEL: testf16_regs:
+; X64: ## %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: pushq %r13
+; X64-NEXT: pushq %r12
+; X64-NEXT: andq $-64, %rsp
+; X64-NEXT: subq $128, %rsp
+; X64-NEXT: vmovaps %zmm1, %zmm16
+; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: callq _func_float16_ptr
+; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
+; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
+; X64-NEXT: leaq -16(%rbp), %rsp
+; X64-NEXT: popq %r12
+; X64-NEXT: popq %r13
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
@@ -61,24 +168,124 @@ define <16 x float> @testf16_regs(<16 x
}
; test calling conventions - prolog and epilog
-; WIN64-LABEL: test_prolog_epilog
-; WIN64: vmovaps %zmm21, {{.*(%rbp).*}} # 64-byte Spill
-; WIN64: vmovaps %zmm6, {{.*(%rbp).*}} # 64-byte Spill
-; WIN64: call
-; WIN64: vmovaps {{.*(%rbp).*}}, %zmm6 # 64-byte Reload
-; WIN64: vmovaps {{.*(%rbp).*}}, %zmm21 # 64-byte Reload
-
-; X64-LABEL: test_prolog_epilog
-; X64: kmovq %k7, {{.*}}(%rsp) ## 8-byte Spill
-; X64: kmovq %k6, {{.*}}(%rsp) ## 8-byte Spill
-; X64: kmovq %k5, {{.*}}(%rsp) ## 8-byte Spill
-; X64: kmovq %k4, {{.*}}(%rsp) ## 8-byte Spill
-; X64: vmovups %zmm31, {{.*}}(%rsp) ## 64-byte Spill
-; X64: vmovups %zmm16, {{.*}}(%rsp) ## 64-byte Spill
-; X64: call
-; X64: vmovups {{.*}}(%rsp), %zmm16 ## 64-byte Reload
-; X64: vmovups {{.*}}(%rsp), %zmm31 ## 64-byte Reload
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+; X32-LABEL: test_prolog_epilog:
+; X32: ## %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: calll _func_float16
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+;
+; WIN32-LABEL: test_prolog_epilog:
+; WIN32: # %bb.0:
+; WIN32-NEXT: calll _func_float16
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_prolog_epilog:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: subq $1328, %rsp # imm = 0x530
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT: kmovq %k7, 1192(%rbp) # 8-byte Spill
+; WIN64-NEXT: kmovq %k6, 1184(%rbp) # 8-byte Spill
+; WIN64-NEXT: kmovq %k5, 1176(%rbp) # 8-byte Spill
+; WIN64-NEXT: kmovq %k4, 1168(%rbp) # 8-byte Spill
+; WIN64-NEXT: vmovaps %zmm21, 1056(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm16, 704(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm15, 640(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm14, 576(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm13, 512(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm12, 448(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm11, 384(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm10, 320(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm9, 256(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm8, 192(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm7, 128(%rbp) # 64-byte Spill
+; WIN64-NEXT: vmovaps %zmm6, 64(%rbp) # 64-byte Spill
+; WIN64-NEXT: andq $-64, %rsp
+; WIN64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: callq func_float16
+; WIN64-NEXT: vmovaps 64(%rbp), %zmm6 # 64-byte Reload
+; WIN64-NEXT: vmovaps 128(%rbp), %zmm7 # 64-byte Reload
+; WIN64-NEXT: vmovaps 192(%rbp), %zmm8 # 64-byte Reload
+; WIN64-NEXT: vmovaps 256(%rbp), %zmm9 # 64-byte Reload
+; WIN64-NEXT: vmovaps 320(%rbp), %zmm10 # 64-byte Reload
+; WIN64-NEXT: vmovaps 384(%rbp), %zmm11 # 64-byte Reload
+; WIN64-NEXT: vmovaps 448(%rbp), %zmm12 # 64-byte Reload
+; WIN64-NEXT: vmovaps 512(%rbp), %zmm13 # 64-byte Reload
+; WIN64-NEXT: vmovaps 576(%rbp), %zmm14 # 64-byte Reload
+; WIN64-NEXT: vmovaps 640(%rbp), %zmm15 # 64-byte Reload
+; WIN64-NEXT: vmovaps 704(%rbp), %zmm16 # 64-byte Reload
+; WIN64-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload
+; WIN64-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload
+; WIN64-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload
+; WIN64-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload
+; WIN64-NEXT: vmovaps 1056(%rbp), %zmm21 # 64-byte Reload
+; WIN64-NEXT: kmovq 1168(%rbp), %k4 # 8-byte Reload
+; WIN64-NEXT: kmovq 1176(%rbp), %k5 # 8-byte Reload
+; WIN64-NEXT: kmovq 1184(%rbp), %k6 # 8-byte Reload
+; WIN64-NEXT: kmovq 1192(%rbp), %k7 # 8-byte Reload
+; WIN64-NEXT: leaq 1200(%rbp), %rsp
+; WIN64-NEXT: popq %rbp
+; WIN64-NEXT: retq
+;
+; X64-LABEL: test_prolog_epilog:
+; X64: ## %bb.0:
+; X64-NEXT: pushq %rsi
+; X64-NEXT: pushq %rdi
+; X64-NEXT: subq $1192, %rsp ## imm = 0x4A8
+; X64-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
+; X64-NEXT: callq _func_float16
+; X64-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
+; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
+; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
+; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
+; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
+; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
+; X64-NEXT: addq $1192, %rsp ## imm = 0x4A8
+; X64-NEXT: popq %rdi
+; X64-NEXT: popq %rsi
+; X64-NEXT: retq
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
ret <16 x float> %c
}
@@ -86,19 +293,126 @@ define intel_ocl_bicc <16 x float> @test
declare <16 x float> @func_float16_mask(<16 x float>, <16 x i1>)
-; X64-LABEL: testf16_inp_mask
-; X64: kmovw %edi, %k1
-; X64: call
define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask) {
+; X32-LABEL: testf16_inp_mask:
+; X32: ## %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: calll _func_float16_mask
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+;
+; WIN32-LABEL: testf16_inp_mask:
+; WIN32: # %bb.0:
+; WIN32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; WIN32-NEXT: calll _func_float16_mask
+; WIN32-NEXT: retl
+;
+; WIN64-KNL-LABEL: testf16_inp_mask:
+; WIN64-KNL: # %bb.0:
+; WIN64-KNL-NEXT: subq $40, %rsp
+; WIN64-KNL-NEXT: .seh_stackalloc 40
+; WIN64-KNL-NEXT: .seh_endprologue
+; WIN64-KNL-NEXT: vmovaps (%rcx), %zmm0
+; WIN64-KNL-NEXT: kmovw %edx, %k1
+; WIN64-KNL-NEXT: callq func_float16_mask
+; WIN64-KNL-NEXT: nop
+; WIN64-KNL-NEXT: addq $40, %rsp
+; WIN64-KNL-NEXT: retq
+; WIN64-KNL-NEXT: .seh_handlerdata
+; WIN64-KNL-NEXT: .text
+; WIN64-KNL-NEXT: .seh_endproc
+;
+; WIN64-SKX-LABEL: testf16_inp_mask:
+; WIN64-SKX: # %bb.0:
+; WIN64-SKX-NEXT: subq $40, %rsp
+; WIN64-SKX-NEXT: .seh_stackalloc 40
+; WIN64-SKX-NEXT: .seh_endprologue
+; WIN64-SKX-NEXT: vmovaps (%rcx), %zmm0
+; WIN64-SKX-NEXT: kmovd %edx, %k1
+; WIN64-SKX-NEXT: callq func_float16_mask
+; WIN64-SKX-NEXT: nop
+; WIN64-SKX-NEXT: addq $40, %rsp
+; WIN64-SKX-NEXT: retq
+; WIN64-SKX-NEXT: .seh_handlerdata
+; WIN64-SKX-NEXT: .text
+; WIN64-SKX-NEXT: .seh_endproc
+;
+; X64-KNL-LABEL: testf16_inp_mask:
+; X64-KNL: ## %bb.0:
+; X64-KNL-NEXT: pushq %rbp
+; X64-KNL-NEXT: .cfi_def_cfa_offset 16
+; X64-KNL-NEXT: pushq %r13
+; X64-KNL-NEXT: .cfi_def_cfa_offset 24
+; X64-KNL-NEXT: pushq %r12
+; X64-KNL-NEXT: .cfi_def_cfa_offset 32
+; X64-KNL-NEXT: .cfi_offset %r12, -32
+; X64-KNL-NEXT: .cfi_offset %r13, -24
+; X64-KNL-NEXT: .cfi_offset %rbp, -16
+; X64-KNL-NEXT: kmovw %edi, %k1
+; X64-KNL-NEXT: callq _func_float16_mask
+; X64-KNL-NEXT: popq %r12
+; X64-KNL-NEXT: popq %r13
+; X64-KNL-NEXT: popq %rbp
+; X64-KNL-NEXT: retq
+;
+; X64-SKX-LABEL: testf16_inp_mask:
+; X64-SKX: ## %bb.0:
+; X64-SKX-NEXT: pushq %rbp
+; X64-SKX-NEXT: .cfi_def_cfa_offset 16
+; X64-SKX-NEXT: pushq %r13
+; X64-SKX-NEXT: .cfi_def_cfa_offset 24
+; X64-SKX-NEXT: pushq %r12
+; X64-SKX-NEXT: .cfi_def_cfa_offset 32
+; X64-SKX-NEXT: .cfi_offset %r12, -32
+; X64-SKX-NEXT: .cfi_offset %r13, -24
+; X64-SKX-NEXT: .cfi_offset %rbp, -16
+; X64-SKX-NEXT: kmovd %edi, %k1
+; X64-SKX-NEXT: callq _func_float16_mask
+; X64-SKX-NEXT: popq %r12
+; X64-SKX-NEXT: popq %r13
+; X64-SKX-NEXT: popq %rbp
+; X64-SKX-NEXT: retq
%imask = bitcast i16 %mask to <16 x i1>
%1 = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1> %imask)
ret <16 x float> %1
}
-; X64-LABEL: test_prolog_epilog_with_mask
-; X64: kxorw %k{{.*}}, %k{{.*}}, %k1
-; X64: call
define intel_ocl_bicc <16 x float> @test_prolog_epilog_with_mask(<16 x float> %a, <16 x i32> %x1, <16 x i32>%x2, <16 x i1> %mask) nounwind {
+; X32-LABEL: test_prolog_epilog_with_mask:
+; X32: ## %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
+; X32-NEXT: kxorw %k1, %k0, %k1
+; X32-NEXT: calll _func_float16_mask
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+;
+; WIN32-LABEL: test_prolog_epilog_with_mask:
+; WIN32: # %bb.0:
+; WIN32-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
+; WIN32-NEXT: kxorw %k1, %k0, %k1
+; WIN32-NEXT: calll _func_float16_mask
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_prolog_epilog_with_mask:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $40, %rsp
+; WIN64-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
+; WIN64-NEXT: kxorw %k1, %k0, %k1
+; WIN64-NEXT: callq func_float16_mask
+; WIN64-NEXT: addq $40, %rsp
+; WIN64-NEXT: retq
+;
+; X64-LABEL: test_prolog_epilog_with_mask:
+; X64: ## %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
+; X64-NEXT: kxorw %k1, %k0, %k1
+; X64-NEXT: callq _func_float16_mask
+; X64-NEXT: popq %rax
+; X64-NEXT: retq
%cmp_res = icmp eq <16 x i32>%x1, %x2
%mask1 = xor <16 x i1> %cmp_res, %mask
%c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1)
Modified: llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll?rev=324530&r1=324529&r2=324530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll Wed Feb 7 13:29:30 2018
@@ -1,30 +1,717 @@
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mattr=+avx512f < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64
-; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mattr=+avx512f < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK64 -check-prefix=CHECK64-KNL
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK64 -check-prefix=CHECK64-SKX
+; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK32 -check-prefix=CHECK32-KNL
+; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK32 -check-prefix=CHECK32-SKX
; Make sure we spill the high numbered zmm registers and K registers with the right encoding.
-; CHECK-LABEL: foo
-; CHECK: kmovq %k7, {{.+}}
-; CHECK64: encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00]
-; CHECK32: encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
-; k6 is used as an anchor for the previous regexp.
-; CHECK-NEXT: kmovq %k6
-
-; CHECK64: movups %zmm31, {{.+}}
-; CHECK64: encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
-; zmm30 is used as an anchor for the previous regexp.
-; CHECK64-NEXT: movups %zmm30
-
-; CHECK32-NOT: zmm31
-; CHECK32-NOT: zmm8
-; CHECK32: movups %zmm7, {{.+}}
-; CHECK32: encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
-; zmm6 is used as an anchor for the previous regexp.
-; CHECK32-NEXT: movups %zmm6
-
-; CHECK: call
-; CHECK: iret
define x86_intrcc void @foo(i8* %frame) {
+; CHECK64-KNL-LABEL: foo:
+; CHECK64-KNL: ## %bb.0:
+; CHECK64-KNL-NEXT: pushq %rax ## encoding: [0x50]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-KNL-NEXT: pushq %r11 ## encoding: [0x41,0x53]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 24
+; CHECK64-KNL-NEXT: pushq %r10 ## encoding: [0x41,0x52]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 32
+; CHECK64-KNL-NEXT: pushq %r9 ## encoding: [0x41,0x51]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 40
+; CHECK64-KNL-NEXT: pushq %r8 ## encoding: [0x41,0x50]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 48
+; CHECK64-KNL-NEXT: pushq %rdi ## encoding: [0x57]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 56
+; CHECK64-KNL-NEXT: pushq %rsi ## encoding: [0x56]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 64
+; CHECK64-KNL-NEXT: pushq %rdx ## encoding: [0x52]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 72
+; CHECK64-KNL-NEXT: pushq %rcx ## encoding: [0x51]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 80
+; CHECK64-KNL-NEXT: subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: ## imm = 0x870
+; CHECK64-KNL-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k3, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k2, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k1, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-KNL-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
+; CHECK64-KNL-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x6c,0x24,0x1d]
+; CHECK64-KNL-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x64,0x24,0x1c]
+; CHECK64-KNL-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x5c,0x24,0x1b]
+; CHECK64-KNL-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x54,0x24,0x1a]
+; CHECK64-KNL-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x4c,0x24,0x19]
+; CHECK64-KNL-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x44,0x24,0x18]
+; CHECK64-KNL-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x7c,0x24,0x17]
+; CHECK64-KNL-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x74,0x24,0x16]
+; CHECK64-KNL-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x6c,0x24,0x15]
+; CHECK64-KNL-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x64,0x24,0x14]
+; CHECK64-KNL-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x5c,0x24,0x13]
+; CHECK64-KNL-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x54,0x24,0x12]
+; CHECK64-KNL-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x4c,0x24,0x11]
+; CHECK64-KNL-NEXT: vmovups %zmm16, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x44,0x24,0x10]
+; CHECK64-KNL-NEXT: vmovups %zmm15, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x7c,0x24,0x0f]
+; CHECK64-KNL-NEXT: vmovups %zmm14, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x74,0x24,0x0e]
+; CHECK64-KNL-NEXT: vmovups %zmm13, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x6c,0x24,0x0d]
+; CHECK64-KNL-NEXT: vmovups %zmm12, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x64,0x24,0x0c]
+; CHECK64-KNL-NEXT: vmovups %zmm11, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x5c,0x24,0x0b]
+; CHECK64-KNL-NEXT: vmovups %zmm10, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x54,0x24,0x0a]
+; CHECK64-KNL-NEXT: vmovups %zmm9, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x4c,0x24,0x09]
+; CHECK64-KNL-NEXT: vmovups %zmm8, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x44,0x24,0x08]
+; CHECK64-KNL-NEXT: vmovups %zmm7, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
+; CHECK64-KNL-NEXT: vmovups %zmm6, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK64-KNL-NEXT: vmovups %zmm5, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK64-KNL-NEXT: vmovups %zmm4, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK64-KNL-NEXT: vmovups %zmm3, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK64-KNL-NEXT: vmovups %zmm2, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK64-KNL-NEXT: vmovups %zmm1, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
+; CHECK64-KNL-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
+; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 2240
+; CHECK64-KNL-NEXT: .cfi_offset %rcx, -80
+; CHECK64-KNL-NEXT: .cfi_offset %rdx, -72
+; CHECK64-KNL-NEXT: .cfi_offset %rsi, -64
+; CHECK64-KNL-NEXT: .cfi_offset %rdi, -56
+; CHECK64-KNL-NEXT: .cfi_offset %r8, -48
+; CHECK64-KNL-NEXT: .cfi_offset %r9, -40
+; CHECK64-KNL-NEXT: .cfi_offset %r10, -32
+; CHECK64-KNL-NEXT: .cfi_offset %r11, -24
+; CHECK64-KNL-NEXT: .cfi_offset %rax, -16
+; CHECK64-KNL-NEXT: .cfi_offset %xmm0, -2240
+; CHECK64-KNL-NEXT: .cfi_offset %xmm1, -2176
+; CHECK64-KNL-NEXT: .cfi_offset %xmm2, -2112
+; CHECK64-KNL-NEXT: .cfi_offset %xmm3, -2048
+; CHECK64-KNL-NEXT: .cfi_offset %xmm4, -1984
+; CHECK64-KNL-NEXT: .cfi_offset %xmm5, -1920
+; CHECK64-KNL-NEXT: .cfi_offset %xmm6, -1856
+; CHECK64-KNL-NEXT: .cfi_offset %xmm7, -1792
+; CHECK64-KNL-NEXT: .cfi_offset %xmm8, -1728
+; CHECK64-KNL-NEXT: .cfi_offset %xmm9, -1664
+; CHECK64-KNL-NEXT: .cfi_offset %xmm10, -1600
+; CHECK64-KNL-NEXT: .cfi_offset %xmm11, -1536
+; CHECK64-KNL-NEXT: .cfi_offset %xmm12, -1472
+; CHECK64-KNL-NEXT: .cfi_offset %xmm13, -1408
+; CHECK64-KNL-NEXT: .cfi_offset %xmm14, -1344
+; CHECK64-KNL-NEXT: .cfi_offset %xmm15, -1280
+; CHECK64-KNL-NEXT: .cfi_offset %xmm16, -1216
+; CHECK64-KNL-NEXT: .cfi_offset %xmm17, -1152
+; CHECK64-KNL-NEXT: .cfi_offset %xmm18, -1088
+; CHECK64-KNL-NEXT: .cfi_offset %xmm19, -1024
+; CHECK64-KNL-NEXT: .cfi_offset %xmm20, -960
+; CHECK64-KNL-NEXT: .cfi_offset %xmm21, -896
+; CHECK64-KNL-NEXT: .cfi_offset %xmm22, -832
+; CHECK64-KNL-NEXT: .cfi_offset %xmm23, -768
+; CHECK64-KNL-NEXT: .cfi_offset %xmm24, -704
+; CHECK64-KNL-NEXT: .cfi_offset %xmm25, -640
+; CHECK64-KNL-NEXT: .cfi_offset %xmm26, -576
+; CHECK64-KNL-NEXT: .cfi_offset %xmm27, -512
+; CHECK64-KNL-NEXT: .cfi_offset %xmm28, -448
+; CHECK64-KNL-NEXT: .cfi_offset %xmm29, -384
+; CHECK64-KNL-NEXT: .cfi_offset %xmm30, -320
+; CHECK64-KNL-NEXT: .cfi_offset %xmm31, -224
+; CHECK64-KNL-NEXT: .cfi_offset %k0, -144
+; CHECK64-KNL-NEXT: .cfi_offset %k1, -136
+; CHECK64-KNL-NEXT: .cfi_offset %k2, -128
+; CHECK64-KNL-NEXT: .cfi_offset %k3, -120
+; CHECK64-KNL-NEXT: .cfi_offset %k4, -112
+; CHECK64-KNL-NEXT: .cfi_offset %k5, -104
+; CHECK64-KNL-NEXT: .cfi_offset %k6, -96
+; CHECK64-KNL-NEXT: .cfi_offset %k7, -88
+; CHECK64-KNL-NEXT: cld ## encoding: [0xfc]
+; CHECK64-KNL-NEXT: callq _bar ## encoding: [0xe8,A,A,A,A]
+; CHECK64-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
+; CHECK64-KNL-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm1 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm2 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm3 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm4 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm5 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm6 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm7 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm8 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x44,0x24,0x08]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm9 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x4c,0x24,0x09]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm10 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x54,0x24,0x0a]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm11 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x5c,0x24,0x0b]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm12 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x64,0x24,0x0c]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm13 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x6c,0x24,0x0d]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm14 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x74,0x24,0x0e]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm15 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x7c,0x24,0x0f]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm16 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x44,0x24,0x10]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x4c,0x24,0x11]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x54,0x24,0x12]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x5c,0x24,0x13]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x64,0x24,0x14]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x6c,0x24,0x15]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x74,0x24,0x16]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x7c,0x24,0x17]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x44,0x24,0x18]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x4c,0x24,0x19]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x54,0x24,0x1a]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x5c,0x24,0x1b]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x64,0x24,0x1c]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x6c,0x24,0x1d]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
+; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k2 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k3 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
+; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT: ## imm = 0x870
+; CHECK64-KNL-NEXT: popq %rcx ## encoding: [0x59]
+; CHECK64-KNL-NEXT: popq %rdx ## encoding: [0x5a]
+; CHECK64-KNL-NEXT: popq %rsi ## encoding: [0x5e]
+; CHECK64-KNL-NEXT: popq %rdi ## encoding: [0x5f]
+; CHECK64-KNL-NEXT: popq %r8 ## encoding: [0x41,0x58]
+; CHECK64-KNL-NEXT: popq %r9 ## encoding: [0x41,0x59]
+; CHECK64-KNL-NEXT: popq %r10 ## encoding: [0x41,0x5a]
+; CHECK64-KNL-NEXT: popq %r11 ## encoding: [0x41,0x5b]
+; CHECK64-KNL-NEXT: popq %rax ## encoding: [0x58]
+; CHECK64-KNL-NEXT: iretq ## encoding: [0x48,0xcf]
+;
+; CHECK64-SKX-LABEL: foo:
+; CHECK64-SKX: ## %bb.0:
+; CHECK64-SKX-NEXT: pushq %rax ## encoding: [0x50]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-SKX-NEXT: pushq %r11 ## encoding: [0x41,0x53]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 24
+; CHECK64-SKX-NEXT: pushq %r10 ## encoding: [0x41,0x52]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 32
+; CHECK64-SKX-NEXT: pushq %r9 ## encoding: [0x41,0x51]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 40
+; CHECK64-SKX-NEXT: pushq %r8 ## encoding: [0x41,0x50]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 48
+; CHECK64-SKX-NEXT: pushq %rdi ## encoding: [0x57]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 56
+; CHECK64-SKX-NEXT: pushq %rsi ## encoding: [0x56]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 64
+; CHECK64-SKX-NEXT: pushq %rdx ## encoding: [0x52]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 72
+; CHECK64-SKX-NEXT: pushq %rcx ## encoding: [0x51]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 80
+; CHECK64-SKX-NEXT: subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: ## imm = 0x870
+; CHECK64-SKX-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k3, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k2, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k1, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
+; CHECK64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x6c,0x24,0x1d]
+; CHECK64-SKX-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x64,0x24,0x1c]
+; CHECK64-SKX-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x5c,0x24,0x1b]
+; CHECK64-SKX-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x54,0x24,0x1a]
+; CHECK64-SKX-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x4c,0x24,0x19]
+; CHECK64-SKX-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x44,0x24,0x18]
+; CHECK64-SKX-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x7c,0x24,0x17]
+; CHECK64-SKX-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x74,0x24,0x16]
+; CHECK64-SKX-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x6c,0x24,0x15]
+; CHECK64-SKX-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x64,0x24,0x14]
+; CHECK64-SKX-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x5c,0x24,0x13]
+; CHECK64-SKX-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x54,0x24,0x12]
+; CHECK64-SKX-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x4c,0x24,0x11]
+; CHECK64-SKX-NEXT: vmovups %zmm16, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x44,0x24,0x10]
+; CHECK64-SKX-NEXT: vmovups %zmm15, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x7c,0x24,0x0f]
+; CHECK64-SKX-NEXT: vmovups %zmm14, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x74,0x24,0x0e]
+; CHECK64-SKX-NEXT: vmovups %zmm13, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x6c,0x24,0x0d]
+; CHECK64-SKX-NEXT: vmovups %zmm12, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x64,0x24,0x0c]
+; CHECK64-SKX-NEXT: vmovups %zmm11, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x5c,0x24,0x0b]
+; CHECK64-SKX-NEXT: vmovups %zmm10, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x54,0x24,0x0a]
+; CHECK64-SKX-NEXT: vmovups %zmm9, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x4c,0x24,0x09]
+; CHECK64-SKX-NEXT: vmovups %zmm8, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x44,0x24,0x08]
+; CHECK64-SKX-NEXT: vmovups %zmm7, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
+; CHECK64-SKX-NEXT: vmovups %zmm6, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK64-SKX-NEXT: vmovups %zmm5, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK64-SKX-NEXT: vmovups %zmm4, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK64-SKX-NEXT: vmovups %zmm3, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK64-SKX-NEXT: vmovups %zmm2, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK64-SKX-NEXT: vmovups %zmm1, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
+; CHECK64-SKX-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
+; CHECK64-SKX-NEXT: .cfi_def_cfa_offset 2240
+; CHECK64-SKX-NEXT: .cfi_offset %rcx, -80
+; CHECK64-SKX-NEXT: .cfi_offset %rdx, -72
+; CHECK64-SKX-NEXT: .cfi_offset %rsi, -64
+; CHECK64-SKX-NEXT: .cfi_offset %rdi, -56
+; CHECK64-SKX-NEXT: .cfi_offset %r8, -48
+; CHECK64-SKX-NEXT: .cfi_offset %r9, -40
+; CHECK64-SKX-NEXT: .cfi_offset %r10, -32
+; CHECK64-SKX-NEXT: .cfi_offset %r11, -24
+; CHECK64-SKX-NEXT: .cfi_offset %rax, -16
+; CHECK64-SKX-NEXT: .cfi_offset %xmm0, -2240
+; CHECK64-SKX-NEXT: .cfi_offset %xmm1, -2176
+; CHECK64-SKX-NEXT: .cfi_offset %xmm2, -2112
+; CHECK64-SKX-NEXT: .cfi_offset %xmm3, -2048
+; CHECK64-SKX-NEXT: .cfi_offset %xmm4, -1984
+; CHECK64-SKX-NEXT: .cfi_offset %xmm5, -1920
+; CHECK64-SKX-NEXT: .cfi_offset %xmm6, -1856
+; CHECK64-SKX-NEXT: .cfi_offset %xmm7, -1792
+; CHECK64-SKX-NEXT: .cfi_offset %xmm8, -1728
+; CHECK64-SKX-NEXT: .cfi_offset %xmm9, -1664
+; CHECK64-SKX-NEXT: .cfi_offset %xmm10, -1600
+; CHECK64-SKX-NEXT: .cfi_offset %xmm11, -1536
+; CHECK64-SKX-NEXT: .cfi_offset %xmm12, -1472
+; CHECK64-SKX-NEXT: .cfi_offset %xmm13, -1408
+; CHECK64-SKX-NEXT: .cfi_offset %xmm14, -1344
+; CHECK64-SKX-NEXT: .cfi_offset %xmm15, -1280
+; CHECK64-SKX-NEXT: .cfi_offset %xmm16, -1216
+; CHECK64-SKX-NEXT: .cfi_offset %xmm17, -1152
+; CHECK64-SKX-NEXT: .cfi_offset %xmm18, -1088
+; CHECK64-SKX-NEXT: .cfi_offset %xmm19, -1024
+; CHECK64-SKX-NEXT: .cfi_offset %xmm20, -960
+; CHECK64-SKX-NEXT: .cfi_offset %xmm21, -896
+; CHECK64-SKX-NEXT: .cfi_offset %xmm22, -832
+; CHECK64-SKX-NEXT: .cfi_offset %xmm23, -768
+; CHECK64-SKX-NEXT: .cfi_offset %xmm24, -704
+; CHECK64-SKX-NEXT: .cfi_offset %xmm25, -640
+; CHECK64-SKX-NEXT: .cfi_offset %xmm26, -576
+; CHECK64-SKX-NEXT: .cfi_offset %xmm27, -512
+; CHECK64-SKX-NEXT: .cfi_offset %xmm28, -448
+; CHECK64-SKX-NEXT: .cfi_offset %xmm29, -384
+; CHECK64-SKX-NEXT: .cfi_offset %xmm30, -320
+; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -224
+; CHECK64-SKX-NEXT: .cfi_offset %k0, -144
+; CHECK64-SKX-NEXT: .cfi_offset %k1, -136
+; CHECK64-SKX-NEXT: .cfi_offset %k2, -128
+; CHECK64-SKX-NEXT: .cfi_offset %k3, -120
+; CHECK64-SKX-NEXT: .cfi_offset %k4, -112
+; CHECK64-SKX-NEXT: .cfi_offset %k5, -104
+; CHECK64-SKX-NEXT: .cfi_offset %k6, -96
+; CHECK64-SKX-NEXT: .cfi_offset %k7, -88
+; CHECK64-SKX-NEXT: cld ## encoding: [0xfc]
+; CHECK64-SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK64-SKX-NEXT: callq _bar ## encoding: [0xe8,A,A,A,A]
+; CHECK64-SKX-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
+; CHECK64-SKX-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm1 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm2 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm3 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm4 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm5 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm6 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm7 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm8 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x44,0x24,0x08]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm9 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x4c,0x24,0x09]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm10 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x54,0x24,0x0a]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm11 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x5c,0x24,0x0b]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm12 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x64,0x24,0x0c]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm13 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x6c,0x24,0x0d]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm14 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x74,0x24,0x0e]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm15 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x7c,0x24,0x0f]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm16 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x44,0x24,0x10]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x4c,0x24,0x11]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x54,0x24,0x12]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x5c,0x24,0x13]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x64,0x24,0x14]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x6c,0x24,0x15]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x74,0x24,0x16]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x7c,0x24,0x17]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x44,0x24,0x18]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x4c,0x24,0x19]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x54,0x24,0x1a]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x5c,0x24,0x1b]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x64,0x24,0x1c]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x6c,0x24,0x1d]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
+; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k2 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k3 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
+; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00]
+; CHECK64-SKX-NEXT: ## imm = 0x870
+; CHECK64-SKX-NEXT: popq %rcx ## encoding: [0x59]
+; CHECK64-SKX-NEXT: popq %rdx ## encoding: [0x5a]
+; CHECK64-SKX-NEXT: popq %rsi ## encoding: [0x5e]
+; CHECK64-SKX-NEXT: popq %rdi ## encoding: [0x5f]
+; CHECK64-SKX-NEXT: popq %r8 ## encoding: [0x41,0x58]
+; CHECK64-SKX-NEXT: popq %r9 ## encoding: [0x41,0x59]
+; CHECK64-SKX-NEXT: popq %r10 ## encoding: [0x41,0x5a]
+; CHECK64-SKX-NEXT: popq %r11 ## encoding: [0x41,0x5b]
+; CHECK64-SKX-NEXT: popq %rax ## encoding: [0x58]
+; CHECK64-SKX-NEXT: iretq ## encoding: [0x48,0xcf]
+;
+; CHECK32-KNL-LABEL: foo:
+; CHECK32-KNL: ## %bb.0:
+; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52]
+; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-KNL-NEXT: pushl %ecx ## encoding: [0x51]
+; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 12
+; CHECK32-KNL-NEXT: pushl %eax ## encoding: [0x50]
+; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-KNL-NEXT: subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## imm = 0x270
+; CHECK32-KNL-NEXT: kmovq %k7, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k6, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k5, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k4, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k3, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k2, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k1, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-KNL-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK32-KNL-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK32-KNL-NEXT: vmovups %zmm4, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK32-KNL-NEXT: vmovups %zmm3, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK32-KNL-NEXT: vmovups %zmm2, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK32-KNL-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
+; CHECK32-KNL-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
+; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 640
+; CHECK32-KNL-NEXT: .cfi_offset %eax, -16
+; CHECK32-KNL-NEXT: .cfi_offset %ecx, -12
+; CHECK32-KNL-NEXT: .cfi_offset %edx, -8
+; CHECK32-KNL-NEXT: .cfi_offset %xmm0, -640
+; CHECK32-KNL-NEXT: .cfi_offset %xmm1, -576
+; CHECK32-KNL-NEXT: .cfi_offset %xmm2, -512
+; CHECK32-KNL-NEXT: .cfi_offset %xmm3, -448
+; CHECK32-KNL-NEXT: .cfi_offset %xmm4, -384
+; CHECK32-KNL-NEXT: .cfi_offset %xmm5, -320
+; CHECK32-KNL-NEXT: .cfi_offset %xmm6, -256
+; CHECK32-KNL-NEXT: .cfi_offset %xmm7, -160
+; CHECK32-KNL-NEXT: .cfi_offset %k0, -80
+; CHECK32-KNL-NEXT: .cfi_offset %k1, -72
+; CHECK32-KNL-NEXT: .cfi_offset %k2, -64
+; CHECK32-KNL-NEXT: .cfi_offset %k3, -56
+; CHECK32-KNL-NEXT: .cfi_offset %k4, -48
+; CHECK32-KNL-NEXT: .cfi_offset %k5, -40
+; CHECK32-KNL-NEXT: .cfi_offset %k6, -32
+; CHECK32-KNL-NEXT: .cfi_offset %k7, -24
+; CHECK32-KNL-NEXT: cld ## encoding: [0xfc]
+; CHECK32-KNL-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A]
+; CHECK32-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
+; CHECK32-KNL-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm1 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm2 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm3 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm4 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm5 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k2 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k3 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k4 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k5 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k6 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k7 ## 8-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## imm = 0x270
+; CHECK32-KNL-NEXT: popl %eax ## encoding: [0x58]
+; CHECK32-KNL-NEXT: popl %ecx ## encoding: [0x59]
+; CHECK32-KNL-NEXT: popl %edx ## encoding: [0x5a]
+; CHECK32-KNL-NEXT: iretl ## encoding: [0xcf]
+;
+; CHECK32-SKX-LABEL: foo:
+; CHECK32-SKX: ## %bb.0:
+; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52]
+; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-SKX-NEXT: pushl %ecx ## encoding: [0x51]
+; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 12
+; CHECK32-SKX-NEXT: pushl %eax ## encoding: [0x50]
+; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-SKX-NEXT: subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## imm = 0x270
+; CHECK32-SKX-NEXT: kmovq %k7, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k6, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k5, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k4, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k3, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k2, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k1, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-SKX-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK32-SKX-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK32-SKX-NEXT: vmovups %zmm4, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK32-SKX-NEXT: vmovups %zmm3, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK32-SKX-NEXT: vmovups %zmm2, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK32-SKX-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
+; CHECK32-SKX-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
+; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 640
+; CHECK32-SKX-NEXT: .cfi_offset %eax, -16
+; CHECK32-SKX-NEXT: .cfi_offset %ecx, -12
+; CHECK32-SKX-NEXT: .cfi_offset %edx, -8
+; CHECK32-SKX-NEXT: .cfi_offset %xmm0, -640
+; CHECK32-SKX-NEXT: .cfi_offset %xmm1, -576
+; CHECK32-SKX-NEXT: .cfi_offset %xmm2, -512
+; CHECK32-SKX-NEXT: .cfi_offset %xmm3, -448
+; CHECK32-SKX-NEXT: .cfi_offset %xmm4, -384
+; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320
+; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256
+; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -160
+; CHECK32-SKX-NEXT: .cfi_offset %k0, -80
+; CHECK32-SKX-NEXT: .cfi_offset %k1, -72
+; CHECK32-SKX-NEXT: .cfi_offset %k2, -64
+; CHECK32-SKX-NEXT: .cfi_offset %k3, -56
+; CHECK32-SKX-NEXT: .cfi_offset %k4, -48
+; CHECK32-SKX-NEXT: .cfi_offset %k5, -40
+; CHECK32-SKX-NEXT: .cfi_offset %k6, -32
+; CHECK32-SKX-NEXT: .cfi_offset %k7, -24
+; CHECK32-SKX-NEXT: cld ## encoding: [0xfc]
+; CHECK32-SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK32-SKX-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A]
+; CHECK32-SKX-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
+; CHECK32-SKX-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm1 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm2 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm3 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm4 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm5 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k2 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k3 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k4 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k5 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k6 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k7 ## 8-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## imm = 0x270
+; CHECK32-SKX-NEXT: popl %eax ## encoding: [0x58]
+; CHECK32-SKX-NEXT: popl %ecx ## encoding: [0x59]
+; CHECK32-SKX-NEXT: popl %edx ## encoding: [0x5a]
+; CHECK32-SKX-NEXT: iretl ## encoding: [0xcf]
call void @bar()
ret void
}
More information about the llvm-commits
mailing list