[llvm] 5cd690a - Generate sse-intel-ocl.ll automatically. NFC
Amaury Séchet via llvm-commits
llvm-commits at lists.llvm.org
Sat May 7 15:52:18 PDT 2022
Author: Amaury Séchet
Date: 2022-05-07T22:46:39Z
New Revision: 5cd690ad9c6af143502f52b003b3608f693d2938
URL: https://github.com/llvm/llvm-project/commit/5cd690ad9c6af143502f52b003b3608f693d2938
DIFF: https://github.com/llvm/llvm-project/commit/5cd690ad9c6af143502f52b003b3608f693d2938.diff
LOG: Generate sse-intel-ocl.ll automatically. NFC
Added:
Modified:
llvm/test/CodeGen/X86/sse-intel-ocl.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/sse-intel-ocl.ll b/llvm/test/CodeGen/X86/sse-intel-ocl.ll
index 0e37ad4a7918..4ccb88b6cdb7 100644
--- a/llvm/test/CodeGen/X86/sse-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/sse-intel-ocl.ll
@@ -1,68 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN32 %s
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN64 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck -check-prefix=NOT_WIN %s
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
-; WIN64: testf16_inp
-; WIN64: addps {{.*}}, {{%xmm[0-3]}}
-; WIN64: addps {{.*}}, {{%xmm[0-3]}}
-; WIN64: addps {{.*}}, {{%xmm[0-3]}}
-; WIN64: addps {{.*}}, {{%xmm[0-3]}}
-; WIN64: leaq {{.*}}(%rsp), %rcx
-; WIN64: call
-; WIN64: ret
-
-; WIN32: testf16_inp
-; WIN32: pushl %eax
-; WIN32: addps {{.*}}, {{%xmm[0-3]}}
-; WIN32: addps {{.*}}, {{%xmm[0-3]}}
-; WIN32: addps {{.*}}, {{%xmm[0-3]}}
-; WIN32: addps {{.*}}, {{%xmm[0-3]}}
-; WIN32: call
-; WIN32: ret
-
-; NOT_WIN: testf16_inp
-; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
-; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
-; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
-; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
-; NOT_WIN: movq %rsp, %rdi
-; NOT_WIN: call
-; NOT_WIN: ret
;test calling conventions - input parameters
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+; WIN32-LABEL: testf16_inp:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $80, %esp
+; WIN32-NEXT: movups 72(%ebp), %xmm4
+; WIN32-NEXT: movups 8(%ebp), %xmm3
+; WIN32-NEXT: addps %xmm4, %xmm3
+; WIN32-NEXT: movups 56(%ebp), %xmm4
+; WIN32-NEXT: movups 40(%ebp), %xmm5
+; WIN32-NEXT: movups 24(%ebp), %xmm6
+; WIN32-NEXT: movl %esp, %eax
+; WIN32-NEXT: addps %xmm6, %xmm0
+; WIN32-NEXT: addps %xmm5, %xmm1
+; WIN32-NEXT: addps %xmm4, %xmm2
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: calll _func_float16_ptr
+; WIN32-NEXT: addl $4, %esp
+; WIN32-NEXT: addps (%esp), %xmm0
+; WIN32-NEXT: addps {{[0-9]+}}(%esp), %xmm1
+; WIN32-NEXT: addps {{[0-9]+}}(%esp), %xmm2
+; WIN32-NEXT: addps {{[0-9]+}}(%esp), %xmm3
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf16_inp:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $104, %rsp
+; WIN64-NEXT: movaps (%r9), %xmm3
+; WIN64-NEXT: movaps (%r8), %xmm2
+; WIN64-NEXT: movaps (%rdx), %xmm1
+; WIN64-NEXT: movaps (%rcx), %xmm0
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: addps (%rax), %xmm0
+; WIN64-NEXT: addps (%rdx), %xmm1
+; WIN64-NEXT: addps (%rcx), %xmm2
+; WIN64-NEXT: addps (%r8), %xmm3
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: callq func_float16_ptr
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; WIN64-NEXT: addq $104, %rsp
+; WIN64-NEXT: retq
+;
+; NOT_WIN-LABEL: testf16_inp:
+; NOT_WIN: ## %bb.0:
+; NOT_WIN-NEXT: subq $72, %rsp
+; NOT_WIN-NEXT: addps %xmm4, %xmm0
+; NOT_WIN-NEXT: addps %xmm5, %xmm1
+; NOT_WIN-NEXT: addps %xmm6, %xmm2
+; NOT_WIN-NEXT: addps %xmm7, %xmm3
+; NOT_WIN-NEXT: movq %rsp, %rdi
+; NOT_WIN-NEXT: callq _func_float16_ptr
+; NOT_WIN-NEXT: addps (%rsp), %xmm0
+; NOT_WIN-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; NOT_WIN-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; NOT_WIN-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; NOT_WIN-NEXT: addq $72, %rsp
+; NOT_WIN-NEXT: retq
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
- %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
%2 = load <16 x float>, <16 x float>* %y, align 16
%3 = fadd <16 x float> %2, %1
ret <16 x float> %3
}
-;test calling conventions - preserved registers
-
-; preserved xmm6-xmm15
-; WIN64: testf16_regs
-; WIN64: call
-; WIN64: addps {{%xmm[6-9]}}, {{.*}}
-; WIN64: addps {{%xmm[6-9]}}, {{.*}}
-; WIN64: ret
-
-; preserved xmm8-xmm15
-; NOT_WIN: testf16_regs
-; NOT_WIN: call
-; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
-; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
-; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
-; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
-; NOT_WIN: ret
+; test calling conventions - preserved registers
+; preserves xmm6-xmm15 on windows, xmm8-xmm15 on other plateforms.
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+; WIN32-LABEL: testf16_regs:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $80, %esp
+; WIN32-NEXT: movups 72(%ebp), %xmm6
+; WIN32-NEXT: movups 8(%ebp), %xmm3
+; WIN32-NEXT: movups 56(%ebp), %xmm7
+; WIN32-NEXT: movups 40(%ebp), %xmm5
+; WIN32-NEXT: movups 24(%ebp), %xmm4
+; WIN32-NEXT: movl %esp, %eax
+; WIN32-NEXT: addps %xmm4, %xmm0
+; WIN32-NEXT: addps %xmm5, %xmm1
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: addps %xmm6, %xmm3
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: calll _func_float16_ptr
+; WIN32-NEXT: addl $4, %esp
+; WIN32-NEXT: movups 72(%ebp), %xmm4
+; WIN32-NEXT: addps %xmm4, %xmm3
+; WIN32-NEXT: movups 56(%ebp), %xmm4
+; WIN32-NEXT: addps %xmm4, %xmm2
+; WIN32-NEXT: movups 40(%ebp), %xmm4
+; WIN32-NEXT: addps %xmm4, %xmm1
+; WIN32-NEXT: movups 24(%ebp), %xmm4
+; WIN32-NEXT: addps %xmm4, %xmm0
+; WIN32-NEXT: addps (%esp), %xmm0
+; WIN32-NEXT: addps {{[0-9]+}}(%esp), %xmm1
+; WIN32-NEXT: addps {{[0-9]+}}(%esp), %xmm2
+; WIN32-NEXT: addps {{[0-9]+}}(%esp), %xmm3
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf16_regs:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $168, %rsp
+; WIN64-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movaps (%rax), %xmm6
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movaps (%rax), %xmm7
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movaps (%rax), %xmm8
+; WIN64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movaps (%rax), %xmm9
+; WIN64-NEXT: movaps (%rcx), %xmm0
+; WIN64-NEXT: addps %xmm9, %xmm0
+; WIN64-NEXT: movaps (%rdx), %xmm1
+; WIN64-NEXT: addps %xmm8, %xmm1
+; WIN64-NEXT: movaps (%r8), %xmm2
+; WIN64-NEXT: addps %xmm7, %xmm2
+; WIN64-NEXT: movaps (%r9), %xmm3
+; WIN64-NEXT: addps %xmm6, %xmm3
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: callq func_float16_ptr
+; WIN64-NEXT: addps %xmm6, %xmm3
+; WIN64-NEXT: addps %xmm7, %xmm2
+; WIN64-NEXT: addps %xmm8, %xmm1
+; WIN64-NEXT: addps %xmm9, %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
+; WIN64-NEXT: addq $168, %rsp
+; WIN64-NEXT: retq
+;
+; NOT_WIN-LABEL: testf16_regs:
+; NOT_WIN: ## %bb.0:
+; NOT_WIN-NEXT: subq $72, %rsp
+; NOT_WIN-NEXT: movaps %xmm7, %xmm9
+; NOT_WIN-NEXT: movaps %xmm6, %xmm10
+; NOT_WIN-NEXT: movaps %xmm5, %xmm11
+; NOT_WIN-NEXT: movaps %xmm4, %xmm8
+; NOT_WIN-NEXT: addps %xmm4, %xmm0
+; NOT_WIN-NEXT: addps %xmm5, %xmm1
+; NOT_WIN-NEXT: addps %xmm6, %xmm2
+; NOT_WIN-NEXT: addps %xmm7, %xmm3
+; NOT_WIN-NEXT: movq %rsp, %rdi
+; NOT_WIN-NEXT: callq _func_float16_ptr
+; NOT_WIN-NEXT: addps %xmm9, %xmm3
+; NOT_WIN-NEXT: addps %xmm10, %xmm2
+; NOT_WIN-NEXT: addps %xmm11, %xmm1
+; NOT_WIN-NEXT: addps %xmm8, %xmm0
+; NOT_WIN-NEXT: addps (%rsp), %xmm0
+; NOT_WIN-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; NOT_WIN-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; NOT_WIN-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; NOT_WIN-NEXT: addq $72, %rsp
+; NOT_WIN-NEXT: retq
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
- %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
%2 = load <16 x float>, <16 x float>* %y, align 16
%3 = fadd <16 x float> %1, %b
%4 = fadd <16 x float> %2, %3
@@ -70,24 +194,84 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
}
; test calling conventions - prolog and epilog
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
-; NOT_WIN: call
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
-; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+; WIN32-LABEL: test_prolog_epilog:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $96, %esp
+; WIN32-NEXT: movups 8(%ebp), %xmm4
+; WIN32-NEXT: movups 24(%ebp), %xmm5
+; WIN32-NEXT: movups 40(%ebp), %xmm6
+; WIN32-NEXT: movups 56(%ebp), %xmm7
+; WIN32-NEXT: movups %xmm7, {{[0-9]+}}(%esp)
+; WIN32-NEXT: movups %xmm6, {{[0-9]+}}(%esp)
+; WIN32-NEXT: movups %xmm5, {{[0-9]+}}(%esp)
+; WIN32-NEXT: movups %xmm4, {{[0-9]+}}(%esp)
+; WIN32-NEXT: movups %xmm3, (%esp)
+; WIN32-NEXT: calll _func_float16
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_prolog_epilog:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $232, %rsp
+; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps (%r9), %xmm4
+; WIN64-NEXT: movaps (%rdx), %xmm5
+; WIN64-NEXT: movaps (%r8), %xmm6
+; WIN64-NEXT: movaps (%rcx), %xmm7
+; WIN64-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm3, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm5, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movaps %xmm4, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r9
+; WIN64-NEXT: callq func_float16
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN64-NEXT: addq $232, %rsp
+; WIN64-NEXT: retq
+;
+; NOT_WIN-LABEL: test_prolog_epilog:
+; NOT_WIN: ## %bb.0:
+; NOT_WIN-NEXT: subq $136, %rsp
+; NOT_WIN-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; NOT_WIN-NEXT: movaps %xmm8, (%rsp) ## 16-byte Spill
+; NOT_WIN-NEXT: callq _func_float16
+; NOT_WIN-NEXT: movaps (%rsp), %xmm8 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 ## 16-byte Reload
+; NOT_WIN-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 ## 16-byte Reload
+; NOT_WIN-NEXT: addq $136, %rsp
+; NOT_WIN-NEXT: retq
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
ret <16 x float> %c
}
More information about the llvm-commits
mailing list