[llvm] 324d696 - Automatically generates several X86/sse tests cases. NFC
Amaury Séchet via llvm-commits
llvm-commits at lists.llvm.org
Fri May 6 09:14:02 PDT 2022
Author: Amaury Séchet
Date: 2022-05-06T16:08:19Z
New Revision: 324d696c154aad42b2580d2f225a677c04fe0ab4
URL: https://github.com/llvm/llvm-project/commit/324d696c154aad42b2580d2f225a677c04fe0ab4
DIFF: https://github.com/llvm/llvm-project/commit/324d696c154aad42b2580d2f225a677c04fe0ab4.diff
LOG: Automatically generates several X86/sse tests cases. NFC
Added:
Modified:
llvm/test/CodeGen/X86/sse-commute.ll
llvm/test/CodeGen/X86/sse-regcall.ll
llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
llvm/test/CodeGen/X86/sse_reload_fold.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/sse-commute.ll b/llvm/test/CodeGen/X86/sse-commute.ll
index 1800a6eea61b..866b1e268a56 100644
--- a/llvm/test/CodeGen/X86/sse-commute.ll
+++ b/llvm/test/CodeGen/X86/sse-commute.ll
@@ -1,12 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
; Commute the comparison to avoid a move.
; PR7500.
-
-; CHECK-LABEL: a:
-; CHECK-NOT: mov
-; CHECK: pcmpeqd
define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone {
+; CHECK-LABEL: a:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: retq
entry:
%tmp6 = bitcast <2 x double> %0 to <4 x i32> ; <<4 x i32>> [#uses=2]
%tmp4 = bitcast <2 x double> %1 to <4 x i32> ; <<4 x i32>> [#uses=1]
@@ -16,5 +18,3 @@ entry:
%tmp8 = bitcast <4 x i32> %and to <2 x double> ; <<2 x double>> [#uses=1]
ret <2 x double> %tmp8
}
-
-
diff --git a/llvm/test/CodeGen/X86/sse-regcall.ll b/llvm/test/CodeGen/X86/sse-regcall.ll
index e7a4c686f874..09b16bccec40 100644
--- a/llvm/test/CodeGen/X86/sse-regcall.ll
+++ b/llvm/test/CodeGen/X86/sse-regcall.ll
@@ -1,82 +1,188 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s
-; WIN32-LABEL: test_argReti1:
-; WIN32: incb %al
-; WIN32: ret{{.*}}
-
-; WIN64-LABEL: test_argReti1:
-; WIN64: incb %al
-; WIN64: ret{{.*}}
-
; Test regcall when receiving/returning i1
define x86_regcallcc i1 @test_argReti1(i1 %a) {
+; WIN32-LABEL: test_argReti1:
+; WIN32: # %bb.0:
+; WIN32-NEXT: incb %al
+; WIN32-NEXT: # kill: def $al killed $al killed $eax
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_argReti1:
+; WIN64: # %bb.0:
+; WIN64-NEXT: incb %al
+; WIN64-NEXT: # kill: def $al killed $al killed $eax
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: test_argReti1:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: incb %al
+; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax
+; LINUXOSX-NEXT: retq
%add = add i1 %a, 1
ret i1 %add
}
-; WIN32-LABEL: test_CallargReti1:
-; WIN32: movzbl %al, %eax
-; WIN32: call{{.*}} {{.*}}test_argReti1
-; WIN32: incb %al
-; WIN32: ret{{.*}}
-
-; WIN64-LABEL: test_CallargReti1:
-; WIN64: movzbl %al, %eax
-; WIN64: call{{.*}} {{.*}}test_argReti1
-; WIN64: incb %al
-; WIN64: ret{{.*}}
-
; Test regcall when passing/retrieving i1
define x86_regcallcc i1 @test_CallargReti1(i1 %a) {
+; WIN32-LABEL: test_CallargReti1:
+; WIN32: # %bb.0:
+; WIN32-NEXT: incb %al
+; WIN32-NEXT: movzbl %al, %eax
+; WIN32-NEXT: calll _test_argReti1
+; WIN32-NEXT: incb %al
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_CallargReti1:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rax
+; WIN64-NEXT: .seh_stackalloc 8
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: incb %al
+; WIN64-NEXT: movzbl %al, %eax
+; WIN64-NEXT: callq test_argReti1
+; WIN64-NEXT: incb %al
+; WIN64-NEXT: popq %rcx
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
+; LINUXOSX-LABEL: test_CallargReti1:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: pushq %rax
+; LINUXOSX-NEXT: .cfi_def_cfa_offset 16
+; LINUXOSX-NEXT: incb %al
+; LINUXOSX-NEXT: movzbl %al, %eax
+; LINUXOSX-NEXT: callq *test_argReti1 at GOTPCREL(%rip)
+; LINUXOSX-NEXT: incb %al
+; LINUXOSX-NEXT: popq %rcx
+; LINUXOSX-NEXT: .cfi_def_cfa_offset 8
+; LINUXOSX-NEXT: retq
%b = add i1 %a, 1
%c = call x86_regcallcc i1 @test_argReti1(i1 %b)
%d = add i1 %c, 1
ret i1 %d
}
-; WIN64-LABEL: testf32_inp
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: retq
-
-; WIN32-LABEL: testf32_inp
-; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill
-; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
-; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
-; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
-; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
-; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload
-; WIN32: retl
-
-; LINUXOSX-LABEL: testf32_inp
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
-; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: retq
-
;test calling conventions - input parameters, callee saved xmms
define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
+; WIN32-LABEL: testf32_inp:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $32, %esp
+; WIN32-NEXT: movaps %xmm7, (%esp) # 16-byte Spill
+; WIN32-NEXT: movaps %xmm6, %xmm7
+; WIN32-NEXT: movaps %xmm5, %xmm6
+; WIN32-NEXT: movaps %xmm3, %xmm5
+; WIN32-NEXT: movaps %xmm2, %xmm3
+; WIN32-NEXT: movaps %xmm1, %xmm2
+; WIN32-NEXT: movaps %xmm0, %xmm1
+; WIN32-NEXT: addps %xmm4, %xmm0
+; WIN32-NEXT: mulps %xmm4, %xmm1
+; WIN32-NEXT: subps %xmm1, %xmm0
+; WIN32-NEXT: movups 8(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm0
+; WIN32-NEXT: movaps %xmm2, %xmm4
+; WIN32-NEXT: addps %xmm6, %xmm4
+; WIN32-NEXT: mulps %xmm6, %xmm2
+; WIN32-NEXT: subps %xmm2, %xmm4
+; WIN32-NEXT: movups 24(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm4
+; WIN32-NEXT: movaps %xmm3, %xmm2
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: mulps %xmm7, %xmm3
+; WIN32-NEXT: subps %xmm3, %xmm2
+; WIN32-NEXT: movups 40(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm2
+; WIN32-NEXT: movaps %xmm5, %xmm3
+; WIN32-NEXT: movaps (%esp), %xmm1 # 16-byte Reload
+; WIN32-NEXT: addps %xmm1, %xmm3
+; WIN32-NEXT: mulps %xmm1, %xmm5
+; WIN32-NEXT: subps %xmm5, %xmm3
+; WIN32-NEXT: movups 56(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm3
+; WIN32-NEXT: movaps %xmm4, %xmm1
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf32_inp:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $72, %rsp
+; WIN64-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm12, (%rsp) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm0, %xmm12
+; WIN64-NEXT: addps %xmm4, %xmm12
+; WIN64-NEXT: movaps %xmm1, %xmm13
+; WIN64-NEXT: addps %xmm5, %xmm13
+; WIN64-NEXT: movaps %xmm2, %xmm14
+; WIN64-NEXT: addps %xmm6, %xmm14
+; WIN64-NEXT: movaps %xmm3, %xmm15
+; WIN64-NEXT: addps %xmm7, %xmm15
+; WIN64-NEXT: mulps %xmm4, %xmm0
+; WIN64-NEXT: subps %xmm0, %xmm12
+; WIN64-NEXT: mulps %xmm5, %xmm1
+; WIN64-NEXT: subps %xmm1, %xmm13
+; WIN64-NEXT: mulps %xmm6, %xmm2
+; WIN64-NEXT: subps %xmm2, %xmm14
+; WIN64-NEXT: mulps %xmm7, %xmm3
+; WIN64-NEXT: subps %xmm3, %xmm15
+; WIN64-NEXT: addps %xmm8, %xmm12
+; WIN64-NEXT: addps %xmm9, %xmm13
+; WIN64-NEXT: addps %xmm10, %xmm14
+; WIN64-NEXT: addps %xmm11, %xmm15
+; WIN64-NEXT: movaps %xmm12, %xmm0
+; WIN64-NEXT: movaps %xmm13, %xmm1
+; WIN64-NEXT: movaps %xmm14, %xmm2
+; WIN64-NEXT: movaps %xmm15, %xmm3
+; WIN64-NEXT: movaps (%rsp), %xmm12 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
+; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: testf32_inp:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm0, %xmm12
+; LINUXOSX-NEXT: addps %xmm4, %xmm12
+; LINUXOSX-NEXT: movaps %xmm1, %xmm13
+; LINUXOSX-NEXT: addps %xmm5, %xmm13
+; LINUXOSX-NEXT: movaps %xmm2, %xmm14
+; LINUXOSX-NEXT: addps %xmm6, %xmm14
+; LINUXOSX-NEXT: movaps %xmm3, %xmm15
+; LINUXOSX-NEXT: addps %xmm7, %xmm15
+; LINUXOSX-NEXT: mulps %xmm4, %xmm0
+; LINUXOSX-NEXT: subps %xmm0, %xmm12
+; LINUXOSX-NEXT: mulps %xmm5, %xmm1
+; LINUXOSX-NEXT: subps %xmm1, %xmm13
+; LINUXOSX-NEXT: mulps %xmm6, %xmm2
+; LINUXOSX-NEXT: subps %xmm2, %xmm14
+; LINUXOSX-NEXT: mulps %xmm7, %xmm3
+; LINUXOSX-NEXT: subps %xmm3, %xmm15
+; LINUXOSX-NEXT: addps %xmm8, %xmm12
+; LINUXOSX-NEXT: addps %xmm9, %xmm13
+; LINUXOSX-NEXT: addps %xmm10, %xmm14
+; LINUXOSX-NEXT: addps %xmm11, %xmm15
+; LINUXOSX-NEXT: movaps %xmm12, %xmm0
+; LINUXOSX-NEXT: movaps %xmm13, %xmm1
+; LINUXOSX-NEXT: movaps %xmm14, %xmm2
+; LINUXOSX-NEXT: movaps %xmm15, %xmm3
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
+; LINUXOSX-NEXT: retq
%x1 = fadd <16 x float> %a, %b
%x2 = fmul <16 x float> %a, %b
%x3 = fsub <16 x float> %x1, %x2
@@ -84,31 +190,153 @@ define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b,
ret <16 x float> %x4
}
-; WIN32-LABEL: testi32_inp
-; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: popl {{%e(si|di|bx|bp)}}
-; WIN32: popl {{%e(si|di|bx|bp)}}
-; WIN32: retl
-
-; WIN64-LABEL: testi32_inp
-; WIN64: pushq {{%r(bp|bx|1[0-5])}}
-; WIN64: pushq {{%r(bp|bx|1[0-5])}}
-; WIN64: pushq {{%r(bp|bx|1[0-5])}}
-; WIN64: popq {{%r(bp|bx|1[0-5])}}
-; WIN64: popq {{%r(bp|bx|1[0-5])}}
-; WIN64: popq {{%r(bp|bx|1[0-5])}}
-; WIN64: retq
-
-; LINUXOSX-LABEL: testi32_inp
-; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: retq
-
;test calling conventions - input parameters, callee saved GPRs
define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
+; WIN32-LABEL: testi32_inp:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: pushl %ebx
+; WIN32-NEXT: subl $12, %esp
+; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: leal (%edx,%esi), %eax
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: subl %esi, %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: subl %ecx, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: imull %eax, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %ebx, %eax
+; WIN32-NEXT: addl %ecx, %eax
+; WIN32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: imull %ebx, %ecx
+; WIN32-NEXT: addl %eax, %ecx
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; WIN32-NEXT: addl %esi, %edi
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: imull %eax, %edx
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: addl %ecx, %edi
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: addl $12, %esp
+; WIN32-NEXT: popl %ebx
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testi32_inp:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %r13
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: pushq %rbx
+; WIN64-NEXT: # kill: def $edx killed $edx def $rdx
+; WIN64-NEXT: movl %ecx, %ebx
+; WIN64-NEXT: # kill: def $esi killed $esi def $rsi
+; WIN64-NEXT: # kill: def $r15d killed $r15d def $r15
+; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14
+; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12
+; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11
+; WIN64-NEXT: # kill: def $r10d killed $r10d def $r10
+; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9
+; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8
+; WIN64-NEXT: # kill: def $edi killed $edi def $rdi
+; WIN64-NEXT: leal (%rdx,%rdi), %r13d
+; WIN64-NEXT: # kill: def $edx killed $edx killed $rdx
+; WIN64-NEXT: subl %edi, %edx
+; WIN64-NEXT: leal (%rsi,%r8), %ecx
+; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi
+; WIN64-NEXT: subl %r8d, %esi
+; WIN64-NEXT: leal (%r9,%r10), %r8d
+; WIN64-NEXT: movl %r9d, %ebp
+; WIN64-NEXT: subl %r10d, %ebp
+; WIN64-NEXT: movl %eax, %edi
+; WIN64-NEXT: movl %ebx, %r9d
+; WIN64-NEXT: subl %ebx, %edi
+; WIN64-NEXT: imull %edi, %ebp
+; WIN64-NEXT: leal (%r11,%r12), %edi
+; WIN64-NEXT: movl %r11d, %ebx
+; WIN64-NEXT: subl %r12d, %ebx
+; WIN64-NEXT: imull %edx, %ebx
+; WIN64-NEXT: addl %ebp, %ebx
+; WIN64-NEXT: leal (%r14,%r15), %edx
+; WIN64-NEXT: movl %r14d, %ebp
+; WIN64-NEXT: subl %r15d, %ebp
+; WIN64-NEXT: imull %esi, %ebp
+; WIN64-NEXT: addl %ebx, %ebp
+; WIN64-NEXT: addl %r9d, %eax
+; WIN64-NEXT: imull %r8d, %eax
+; WIN64-NEXT: imull %r13d, %edi
+; WIN64-NEXT: addl %edi, %eax
+; WIN64-NEXT: imull %ecx, %edx
+; WIN64-NEXT: addl %edx, %eax
+; WIN64-NEXT: addl %ebp, %eax
+; WIN64-NEXT: popq %rbx
+; WIN64-NEXT: popq %rbp
+; WIN64-NEXT: popq %r13
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: testi32_inp:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: pushq %rbp
+; LINUXOSX-NEXT: pushq %rbx
+; LINUXOSX-NEXT: # kill: def $edx killed $edx def $rdx
+; LINUXOSX-NEXT: # kill: def $esi killed $esi def $rsi
+; LINUXOSX-NEXT: # kill: def $r14d killed $r14d def $r14
+; LINUXOSX-NEXT: # kill: def $r13d killed $r13d def $r13
+; LINUXOSX-NEXT: # kill: def $r12d killed $r12d def $r12
+; LINUXOSX-NEXT: # kill: def $r9d killed $r9d def $r9
+; LINUXOSX-NEXT: # kill: def $r8d killed $r8d def $r8
+; LINUXOSX-NEXT: # kill: def $edi killed $edi def $rdi
+; LINUXOSX-NEXT: leal (%rdx,%rdi), %r10d
+; LINUXOSX-NEXT: movl %edx, %ebp
+; LINUXOSX-NEXT: subl %edi, %ebp
+; LINUXOSX-NEXT: leal (%rsi,%r8), %r11d
+; LINUXOSX-NEXT: # kill: def $esi killed $esi killed $rsi
+; LINUXOSX-NEXT: subl %r8d, %esi
+; LINUXOSX-NEXT: leal (%r9,%r12), %r8d
+; LINUXOSX-NEXT: movl %r9d, %edi
+; LINUXOSX-NEXT: subl %r12d, %edi
+; LINUXOSX-NEXT: movl %eax, %edx
+; LINUXOSX-NEXT: subl %ecx, %edx
+; LINUXOSX-NEXT: imull %edx, %edi
+; LINUXOSX-NEXT: leal (%r13,%r14), %edx
+; LINUXOSX-NEXT: movl %r13d, %ebx
+; LINUXOSX-NEXT: subl %r14d, %ebx
+; LINUXOSX-NEXT: imull %ebp, %ebx
+; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %ebp
+; LINUXOSX-NEXT: addl %edi, %ebx
+; LINUXOSX-NEXT: movl %r15d, %edi
+; LINUXOSX-NEXT: subl %ebp, %edi
+; LINUXOSX-NEXT: imull %esi, %edi
+; LINUXOSX-NEXT: addl %ebx, %edi
+; LINUXOSX-NEXT: addl %ecx, %eax
+; LINUXOSX-NEXT: imull %r8d, %eax
+; LINUXOSX-NEXT: imull %r10d, %edx
+; LINUXOSX-NEXT: addl %edx, %eax
+; LINUXOSX-NEXT: addl %r15d, %ebp
+; LINUXOSX-NEXT: imull %r11d, %ebp
+; LINUXOSX-NEXT: addl %ebp, %eax
+; LINUXOSX-NEXT: addl %edi, %eax
+; LINUXOSX-NEXT: popq %rbx
+; LINUXOSX-NEXT: popq %rbp
+; LINUXOSX-NEXT: retq
i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
%x1 = sub i32 %a1, %a2
%x2 = sub i32 %a3, %a4
@@ -136,53 +364,111 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a
ret i32 %r1
}
-; X32: testf32_stack
-; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
-; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
-; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
-; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
-; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
-
-; LINUXOSX: testf32_stack
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
-; LINUXOSX: retq
-
; Test that parameters, overflowing register capacity, are passed through the stack
define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
+; WIN32-LABEL: testf32_stack:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $48, %esp
+; WIN32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; WIN32-NEXT: movaps %xmm6, (%esp) # 16-byte Spill
+; WIN32-NEXT: movaps %xmm5, %xmm6
+; WIN32-NEXT: movaps %xmm4, %xmm5
+; WIN32-NEXT: movaps %xmm3, %xmm4
+; WIN32-NEXT: movaps %xmm2, %xmm3
+; WIN32-NEXT: movaps %xmm1, %xmm2
+; WIN32-NEXT: movaps %xmm0, %xmm1
+; WIN32-NEXT: movups 120(%ebp), %xmm7
+; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movups 248(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; WIN32-NEXT: movups 104(%ebp), %xmm7
+; WIN32-NEXT: movaps (%esp), %xmm0 # 16-byte Reload
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movups 232(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill
+; WIN32-NEXT: movups 88(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm6
+; WIN32-NEXT: movups 216(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm6
+; WIN32-NEXT: movups 72(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm5
+; WIN32-NEXT: movups 200(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm5
+; WIN32-NEXT: movups 56(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm4
+; WIN32-NEXT: movups 184(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm4
+; WIN32-NEXT: movups 40(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm3
+; WIN32-NEXT: movups 168(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm3
+; WIN32-NEXT: movups 24(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: movups 152(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: movups 8(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm1
+; WIN32-NEXT: movups 136(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm1
+; WIN32-NEXT: movaps %xmm1, %xmm0
+; WIN32-NEXT: movaps %xmm2, %xmm1
+; WIN32-NEXT: movaps %xmm3, %xmm2
+; WIN32-NEXT: movaps %xmm4, %xmm3
+; WIN32-NEXT: movaps %xmm5, %xmm4
+; WIN32-NEXT: movaps %xmm6, %xmm5
+; WIN32-NEXT: movaps (%esp), %xmm6 # 16-byte Reload
+; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf32_stack:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rax
+; WIN64-NEXT: addps %xmm15, %xmm7
+; WIN64-NEXT: addps %xmm14, %xmm6
+; WIN64-NEXT: addps %xmm13, %xmm5
+; WIN64-NEXT: addps %xmm12, %xmm4
+; WIN64-NEXT: addps %xmm11, %xmm3
+; WIN64-NEXT: addps %xmm10, %xmm2
+; WIN64-NEXT: addps %xmm9, %xmm1
+; WIN64-NEXT: addps %xmm8, %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
+; WIN64-NEXT: popq %rax
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: testf32_stack:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: addps %xmm15, %xmm7
+; LINUXOSX-NEXT: addps %xmm14, %xmm6
+; LINUXOSX-NEXT: addps %xmm13, %xmm5
+; LINUXOSX-NEXT: addps %xmm12, %xmm4
+; LINUXOSX-NEXT: addps %xmm11, %xmm3
+; LINUXOSX-NEXT: addps %xmm10, %xmm2
+; LINUXOSX-NEXT: addps %xmm9, %xmm1
+; LINUXOSX-NEXT: addps %xmm8, %xmm0
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
+; LINUXOSX-NEXT: retq
%x1 = fadd <32 x float> %a, %b
%x2 = fadd <32 x float> %x1, %c
ret <32 x float> %x2
diff --git a/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
index a5f62dde81fe..7b4436735a9d 100644
--- a/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
+++ b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -1,13 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "i686-unknown-linux-gnu"
define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
- %A = load <4 x float>, <4 x float>* %P, align 4
- %B = fadd <4 x float> %A, %In
- ret <4 x float> %B
-
-; CHECK-LABEL: @foo
-; CHECK: addps (%eax), %xmm0
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: addps (%eax), %xmm0
+; CHECK-NEXT: retl
+ %A = load <4 x float>, <4 x float>* %P, align 4
+ %B = fadd <4 x float> %A, %In
+ ret <4 x float> %B
}
diff --git a/llvm/test/CodeGen/X86/sse_reload_fold.ll b/llvm/test/CodeGen/X86/sse_reload_fold.ll
index fd8db3be1063..f159b643bbdf 100644
--- a/llvm/test/CodeGen/X86/sse_reload_fold.ll
+++ b/llvm/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,6 +1,5 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic 2>&1 | FileCheck %s
-; CHECK: fail
-; CHECK-NOT: fail
declare float @test_f(float %f)
declare double @test_d(double %f)
@@ -27,91 +26,312 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
define float @foo(float %f) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: callq test_f at PLT
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: sqrtss %xmm0, %xmm0
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call float @test_f(float %f)
%t = call float @llvm.sqrt.f32(float %f)
ret float %t
}
+
define double @doo(double %f) {
+; CHECK-LABEL: doo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: callq test_d at PLT
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: sqrtsd %xmm0, %xmm0
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call double @test_d(double %f)
%t = call double @llvm.sqrt.f64(double %f)
ret double %t
}
+
define <4 x float> @a0(<4 x float> %f) {
+; CHECK-LABEL: a0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: rsqrtps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @a1(<4 x float> %f) {
+; CHECK-LABEL: a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: sqrtps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @a2(<4 x float> %f) {
+; CHECK-LABEL: a2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: rcpps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @b3(<4 x float> %f) {
+; CHECK-LABEL: b3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: minps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @b4(<4 x float> %f) {
+; CHECK-LABEL: b4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: maxps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @b5(<4 x float> %f) {
+; CHECK-LABEL: b5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: cmpordps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
ret <4 x float> %t
}
+
define <4 x float> @b6(<4 x float> %f) {
+; CHECK-LABEL: b6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: addsubps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @b7(<4 x float> %f) {
+; CHECK-LABEL: b7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: haddps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
+
define <4 x float> @b8(<4 x float> %f) {
+; CHECK-LABEL: b8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vf at PLT
+; CHECK-NEXT: hsubps (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
+
define <2 x double> @c1(<2 x double> %f) {
+; CHECK-LABEL: c1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: sqrtpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
ret <2 x double> %t
}
+
define <2 x double> @d3(<2 x double> %f) {
+; CHECK-LABEL: d3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: minpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
+
define <2 x double> @d4(<2 x double> %f) {
+; CHECK-LABEL: d4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: maxpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
+
define <2 x double> @d5(<2 x double> %f) {
+; CHECK-LABEL: d5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: cmpordpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
ret <2 x double> %t
}
+
define <2 x double> @d6(<2 x double> %f) {
+; CHECK-LABEL: d6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: addsubpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
+
define <2 x double> @d7(<2 x double> %f) {
+; CHECK-LABEL: d7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: haddpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
+
define <2 x double> @d8(<2 x double> %f) {
+; CHECK-LABEL: d8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: hsubpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
@@ -126,8 +346,21 @@ define <2 x double> @d8(<2 x double> %f) {
; addq $24, %rsp
; ret
; RABasic still tries to fold this one.
-
define <2 x double> @z0(<2 x double> %f) {
+; CHECK-LABEL: z0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq test_vd at PLT
+; CHECK-NEXT: movapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: hsubpd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
ret <2 x double> %t
More information about the llvm-commits
mailing list