[llvm] 042bd21 - Revert "Automatically generates several X86/sse tests cases. NFC"
Nico Weber via llvm-commits
llvm-commits at lists.llvm.org
Fri May 6 13:30:06 PDT 2022
Author: Nico Weber
Date: 2022-05-06T16:29:51-04:00
New Revision: 042bd21cf9f42189c37fbaf57c234c3dd82acf1b
URL: https://github.com/llvm/llvm-project/commit/042bd21cf9f42189c37fbaf57c234c3dd82acf1b
DIFF: https://github.com/llvm/llvm-project/commit/042bd21cf9f42189c37fbaf57c234c3dd82acf1b.diff
LOG: Revert "Automatically generates several X86/sse tests cases. NFC"
This reverts commit 324d696c154aad42b2580d2f225a677c04fe0ab4.
Broke check-llvm on Windows, see e.g.
https://lab.llvm.org/buildbot/#/builders/216/builds/4005 and
comment on https://reviews.llvm.org/rG324d696c154aad42b2580d2f225a677c04fe0ab4
Added:
Modified:
llvm/test/CodeGen/X86/sse-commute.ll
llvm/test/CodeGen/X86/sse-regcall.ll
llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
llvm/test/CodeGen/X86/sse_reload_fold.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/sse-commute.ll b/llvm/test/CodeGen/X86/sse-commute.ll
index 866b1e268a560..1800a6eea61b2 100644
--- a/llvm/test/CodeGen/X86/sse-commute.ll
+++ b/llvm/test/CodeGen/X86/sse-commute.ll
@@ -1,14 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
; Commute the comparison to avoid a move.
; PR7500.
-define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone {
+
; CHECK-LABEL: a:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm1
-; CHECK-NEXT: pand %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NOT: mov
+; CHECK: pcmpeqd
+define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone {
entry:
%tmp6 = bitcast <2 x double> %0 to <4 x i32> ; <<4 x i32>> [#uses=2]
%tmp4 = bitcast <2 x double> %1 to <4 x i32> ; <<4 x i32>> [#uses=1]
@@ -18,3 +16,5 @@ entry:
%tmp8 = bitcast <4 x i32> %and to <2 x double> ; <<2 x double>> [#uses=1]
ret <2 x double> %tmp8
}
+
+
diff --git a/llvm/test/CodeGen/X86/sse-regcall.ll b/llvm/test/CodeGen/X86/sse-regcall.ll
index 09b16bccec40c..e7a4c686f8747 100644
--- a/llvm/test/CodeGen/X86/sse-regcall.ll
+++ b/llvm/test/CodeGen/X86/sse-regcall.ll
@@ -1,188 +1,82 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s
+; WIN32-LABEL: test_argReti1:
+; WIN32: incb %al
+; WIN32: ret{{.*}}
+
+; WIN64-LABEL: test_argReti1:
+; WIN64: incb %al
+; WIN64: ret{{.*}}
+
; Test regcall when receiving/returning i1
define x86_regcallcc i1 @test_argReti1(i1 %a) {
-; WIN32-LABEL: test_argReti1:
-; WIN32: # %bb.0:
-; WIN32-NEXT: incb %al
-; WIN32-NEXT: # kill: def $al killed $al killed $eax
-; WIN32-NEXT: retl
-;
-; WIN64-LABEL: test_argReti1:
-; WIN64: # %bb.0:
-; WIN64-NEXT: incb %al
-; WIN64-NEXT: # kill: def $al killed $al killed $eax
-; WIN64-NEXT: retq
-;
-; LINUXOSX-LABEL: test_argReti1:
-; LINUXOSX: # %bb.0:
-; LINUXOSX-NEXT: incb %al
-; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax
-; LINUXOSX-NEXT: retq
%add = add i1 %a, 1
ret i1 %add
}
+; WIN32-LABEL: test_CallargReti1:
+; WIN32: movzbl %al, %eax
+; WIN32: call{{.*}} {{.*}}test_argReti1
+; WIN32: incb %al
+; WIN32: ret{{.*}}
+
+; WIN64-LABEL: test_CallargReti1:
+; WIN64: movzbl %al, %eax
+; WIN64: call{{.*}} {{.*}}test_argReti1
+; WIN64: incb %al
+; WIN64: ret{{.*}}
+
; Test regcall when passing/retrieving i1
define x86_regcallcc i1 @test_CallargReti1(i1 %a) {
-; WIN32-LABEL: test_CallargReti1:
-; WIN32: # %bb.0:
-; WIN32-NEXT: incb %al
-; WIN32-NEXT: movzbl %al, %eax
-; WIN32-NEXT: calll _test_argReti1
-; WIN32-NEXT: incb %al
-; WIN32-NEXT: retl
-;
-; WIN64-LABEL: test_CallargReti1:
-; WIN64: # %bb.0:
-; WIN64-NEXT: pushq %rax
-; WIN64-NEXT: .seh_stackalloc 8
-; WIN64-NEXT: .seh_endprologue
-; WIN64-NEXT: incb %al
-; WIN64-NEXT: movzbl %al, %eax
-; WIN64-NEXT: callq test_argReti1
-; WIN64-NEXT: incb %al
-; WIN64-NEXT: popq %rcx
-; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
-;
-; LINUXOSX-LABEL: test_CallargReti1:
-; LINUXOSX: # %bb.0:
-; LINUXOSX-NEXT: pushq %rax
-; LINUXOSX-NEXT: .cfi_def_cfa_offset 16
-; LINUXOSX-NEXT: incb %al
-; LINUXOSX-NEXT: movzbl %al, %eax
-; LINUXOSX-NEXT: callq *test_argReti1 at GOTPCREL(%rip)
-; LINUXOSX-NEXT: incb %al
-; LINUXOSX-NEXT: popq %rcx
-; LINUXOSX-NEXT: .cfi_def_cfa_offset 8
-; LINUXOSX-NEXT: retq
%b = add i1 %a, 1
%c = call x86_regcallcc i1 @test_argReti1(i1 %b)
%d = add i1 %c, 1
ret i1 %d
}
+; WIN64-LABEL: testf32_inp
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: retq
+
+; WIN32-LABEL: testf32_inp
+; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload
+; WIN32: retl
+
+; LINUXOSX-LABEL: testf32_inp
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: retq
+
;test calling conventions - input parameters, callee saved xmms
define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
-; WIN32-LABEL: testf32_inp:
-; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %ebp
-; WIN32-NEXT: movl %esp, %ebp
-; WIN32-NEXT: andl $-16, %esp
-; WIN32-NEXT: subl $32, %esp
-; WIN32-NEXT: movaps %xmm7, (%esp) # 16-byte Spill
-; WIN32-NEXT: movaps %xmm6, %xmm7
-; WIN32-NEXT: movaps %xmm5, %xmm6
-; WIN32-NEXT: movaps %xmm3, %xmm5
-; WIN32-NEXT: movaps %xmm2, %xmm3
-; WIN32-NEXT: movaps %xmm1, %xmm2
-; WIN32-NEXT: movaps %xmm0, %xmm1
-; WIN32-NEXT: addps %xmm4, %xmm0
-; WIN32-NEXT: mulps %xmm4, %xmm1
-; WIN32-NEXT: subps %xmm1, %xmm0
-; WIN32-NEXT: movups 8(%ebp), %xmm1
-; WIN32-NEXT: addps %xmm1, %xmm0
-; WIN32-NEXT: movaps %xmm2, %xmm4
-; WIN32-NEXT: addps %xmm6, %xmm4
-; WIN32-NEXT: mulps %xmm6, %xmm2
-; WIN32-NEXT: subps %xmm2, %xmm4
-; WIN32-NEXT: movups 24(%ebp), %xmm1
-; WIN32-NEXT: addps %xmm1, %xmm4
-; WIN32-NEXT: movaps %xmm3, %xmm2
-; WIN32-NEXT: addps %xmm7, %xmm2
-; WIN32-NEXT: mulps %xmm7, %xmm3
-; WIN32-NEXT: subps %xmm3, %xmm2
-; WIN32-NEXT: movups 40(%ebp), %xmm1
-; WIN32-NEXT: addps %xmm1, %xmm2
-; WIN32-NEXT: movaps %xmm5, %xmm3
-; WIN32-NEXT: movaps (%esp), %xmm1 # 16-byte Reload
-; WIN32-NEXT: addps %xmm1, %xmm3
-; WIN32-NEXT: mulps %xmm1, %xmm5
-; WIN32-NEXT: subps %xmm5, %xmm3
-; WIN32-NEXT: movups 56(%ebp), %xmm1
-; WIN32-NEXT: addps %xmm1, %xmm3
-; WIN32-NEXT: movaps %xmm4, %xmm1
-; WIN32-NEXT: movl %ebp, %esp
-; WIN32-NEXT: popl %ebp
-; WIN32-NEXT: retl
-;
-; WIN64-LABEL: testf32_inp:
-; WIN64: # %bb.0:
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: movaps %xmm12, (%rsp) # 16-byte Spill
-; WIN64-NEXT: movaps %xmm0, %xmm12
-; WIN64-NEXT: addps %xmm4, %xmm12
-; WIN64-NEXT: movaps %xmm1, %xmm13
-; WIN64-NEXT: addps %xmm5, %xmm13
-; WIN64-NEXT: movaps %xmm2, %xmm14
-; WIN64-NEXT: addps %xmm6, %xmm14
-; WIN64-NEXT: movaps %xmm3, %xmm15
-; WIN64-NEXT: addps %xmm7, %xmm15
-; WIN64-NEXT: mulps %xmm4, %xmm0
-; WIN64-NEXT: subps %xmm0, %xmm12
-; WIN64-NEXT: mulps %xmm5, %xmm1
-; WIN64-NEXT: subps %xmm1, %xmm13
-; WIN64-NEXT: mulps %xmm6, %xmm2
-; WIN64-NEXT: subps %xmm2, %xmm14
-; WIN64-NEXT: mulps %xmm7, %xmm3
-; WIN64-NEXT: subps %xmm3, %xmm15
-; WIN64-NEXT: addps %xmm8, %xmm12
-; WIN64-NEXT: addps %xmm9, %xmm13
-; WIN64-NEXT: addps %xmm10, %xmm14
-; WIN64-NEXT: addps %xmm11, %xmm15
-; WIN64-NEXT: movaps %xmm12, %xmm0
-; WIN64-NEXT: movaps %xmm13, %xmm1
-; WIN64-NEXT: movaps %xmm14, %xmm2
-; WIN64-NEXT: movaps %xmm15, %xmm3
-; WIN64-NEXT: movaps (%rsp), %xmm12 # 16-byte Reload
-; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
-; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
-; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
-; WIN64-NEXT: addq $72, %rsp
-; WIN64-NEXT: retq
-;
-; LINUXOSX-LABEL: testf32_inp:
-; LINUXOSX: # %bb.0:
-; LINUXOSX-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; LINUXOSX-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; LINUXOSX-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; LINUXOSX-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; LINUXOSX-NEXT: movaps %xmm0, %xmm12
-; LINUXOSX-NEXT: addps %xmm4, %xmm12
-; LINUXOSX-NEXT: movaps %xmm1, %xmm13
-; LINUXOSX-NEXT: addps %xmm5, %xmm13
-; LINUXOSX-NEXT: movaps %xmm2, %xmm14
-; LINUXOSX-NEXT: addps %xmm6, %xmm14
-; LINUXOSX-NEXT: movaps %xmm3, %xmm15
-; LINUXOSX-NEXT: addps %xmm7, %xmm15
-; LINUXOSX-NEXT: mulps %xmm4, %xmm0
-; LINUXOSX-NEXT: subps %xmm0, %xmm12
-; LINUXOSX-NEXT: mulps %xmm5, %xmm1
-; LINUXOSX-NEXT: subps %xmm1, %xmm13
-; LINUXOSX-NEXT: mulps %xmm6, %xmm2
-; LINUXOSX-NEXT: subps %xmm2, %xmm14
-; LINUXOSX-NEXT: mulps %xmm7, %xmm3
-; LINUXOSX-NEXT: subps %xmm3, %xmm15
-; LINUXOSX-NEXT: addps %xmm8, %xmm12
-; LINUXOSX-NEXT: addps %xmm9, %xmm13
-; LINUXOSX-NEXT: addps %xmm10, %xmm14
-; LINUXOSX-NEXT: addps %xmm11, %xmm15
-; LINUXOSX-NEXT: movaps %xmm12, %xmm0
-; LINUXOSX-NEXT: movaps %xmm13, %xmm1
-; LINUXOSX-NEXT: movaps %xmm14, %xmm2
-; LINUXOSX-NEXT: movaps %xmm15, %xmm3
-; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
-; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
-; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
-; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
-; LINUXOSX-NEXT: retq
%x1 = fadd <16 x float> %a, %b
%x2 = fmul <16 x float> %a, %b
%x3 = fsub <16 x float> %x1, %x2
@@ -190,153 +84,31 @@ define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b,
ret <16 x float> %x4
}
+; WIN32-LABEL: testi32_inp
+; WIN32: pushl {{%e(si|di|bx|bp)}}
+; WIN32: pushl {{%e(si|di|bx|bp)}}
+; WIN32: popl {{%e(si|di|bx|bp)}}
+; WIN32: popl {{%e(si|di|bx|bp)}}
+; WIN32: retl
+
+; WIN64-LABEL: testi32_inp
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}
+; WIN64: pushq {{%r(bp|bx|1[0-5])}}
+; WIN64: popq {{%r(bp|bx|1[0-5])}}
+; WIN64: popq {{%r(bp|bx|1[0-5])}}
+; WIN64: popq {{%r(bp|bx|1[0-5])}}
+; WIN64: retq
+
+; LINUXOSX-LABEL: testi32_inp
+; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
+; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
+; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
+; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
+; LINUXOSX: retq
+
;test calling conventions - input parameters, callee saved GPRs
define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
-; WIN32-LABEL: testi32_inp:
-; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %ebp
-; WIN32-NEXT: pushl %ebx
-; WIN32-NEXT: subl $12, %esp
-; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill
-; WIN32-NEXT: movl %edi, %esi
-; WIN32-NEXT: movl %edx, %ebx
-; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: movl %eax, %edi
-; WIN32-NEXT: leal (%edx,%esi), %eax
-; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: subl %esi, %ebx
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: subl %ecx, %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: movl %ebp, %ecx
-; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: imull %eax, %ecx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull %ebx, %eax
-; WIN32-NEXT: addl %ecx, %eax
-; WIN32-NEXT: movl (%esp), %ebx # 4-byte Reload
-; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: imull %ebx, %ecx
-; WIN32-NEXT: addl %eax, %ecx
-; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
-; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: imull %ebp, %edi
-; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; WIN32-NEXT: addl %esi, %edi
-; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: imull %eax, %edx
-; WIN32-NEXT: addl %edx, %edi
-; WIN32-NEXT: addl %ecx, %edi
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: addl $12, %esp
-; WIN32-NEXT: popl %ebx
-; WIN32-NEXT: popl %ebp
-; WIN32-NEXT: retl
-;
-; WIN64-LABEL: testi32_inp:
-; WIN64: # %bb.0:
-; WIN64-NEXT: pushq %r13
-; WIN64-NEXT: pushq %rbp
-; WIN64-NEXT: pushq %rbx
-; WIN64-NEXT: # kill: def $edx killed $edx def $rdx
-; WIN64-NEXT: movl %ecx, %ebx
-; WIN64-NEXT: # kill: def $esi killed $esi def $rsi
-; WIN64-NEXT: # kill: def $r15d killed $r15d def $r15
-; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14
-; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12
-; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11
-; WIN64-NEXT: # kill: def $r10d killed $r10d def $r10
-; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9
-; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8
-; WIN64-NEXT: # kill: def $edi killed $edi def $rdi
-; WIN64-NEXT: leal (%rdx,%rdi), %r13d
-; WIN64-NEXT: # kill: def $edx killed $edx killed $rdx
-; WIN64-NEXT: subl %edi, %edx
-; WIN64-NEXT: leal (%rsi,%r8), %ecx
-; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi
-; WIN64-NEXT: subl %r8d, %esi
-; WIN64-NEXT: leal (%r9,%r10), %r8d
-; WIN64-NEXT: movl %r9d, %ebp
-; WIN64-NEXT: subl %r10d, %ebp
-; WIN64-NEXT: movl %eax, %edi
-; WIN64-NEXT: movl %ebx, %r9d
-; WIN64-NEXT: subl %ebx, %edi
-; WIN64-NEXT: imull %edi, %ebp
-; WIN64-NEXT: leal (%r11,%r12), %edi
-; WIN64-NEXT: movl %r11d, %ebx
-; WIN64-NEXT: subl %r12d, %ebx
-; WIN64-NEXT: imull %edx, %ebx
-; WIN64-NEXT: addl %ebp, %ebx
-; WIN64-NEXT: leal (%r14,%r15), %edx
-; WIN64-NEXT: movl %r14d, %ebp
-; WIN64-NEXT: subl %r15d, %ebp
-; WIN64-NEXT: imull %esi, %ebp
-; WIN64-NEXT: addl %ebx, %ebp
-; WIN64-NEXT: addl %r9d, %eax
-; WIN64-NEXT: imull %r8d, %eax
-; WIN64-NEXT: imull %r13d, %edi
-; WIN64-NEXT: addl %edi, %eax
-; WIN64-NEXT: imull %ecx, %edx
-; WIN64-NEXT: addl %edx, %eax
-; WIN64-NEXT: addl %ebp, %eax
-; WIN64-NEXT: popq %rbx
-; WIN64-NEXT: popq %rbp
-; WIN64-NEXT: popq %r13
-; WIN64-NEXT: retq
-;
-; LINUXOSX-LABEL: testi32_inp:
-; LINUXOSX: # %bb.0:
-; LINUXOSX-NEXT: pushq %rbp
-; LINUXOSX-NEXT: pushq %rbx
-; LINUXOSX-NEXT: # kill: def $edx killed $edx def $rdx
-; LINUXOSX-NEXT: # kill: def $esi killed $esi def $rsi
-; LINUXOSX-NEXT: # kill: def $r14d killed $r14d def $r14
-; LINUXOSX-NEXT: # kill: def $r13d killed $r13d def $r13
-; LINUXOSX-NEXT: # kill: def $r12d killed $r12d def $r12
-; LINUXOSX-NEXT: # kill: def $r9d killed $r9d def $r9
-; LINUXOSX-NEXT: # kill: def $r8d killed $r8d def $r8
-; LINUXOSX-NEXT: # kill: def $edi killed $edi def $rdi
-; LINUXOSX-NEXT: leal (%rdx,%rdi), %r10d
-; LINUXOSX-NEXT: movl %edx, %ebp
-; LINUXOSX-NEXT: subl %edi, %ebp
-; LINUXOSX-NEXT: leal (%rsi,%r8), %r11d
-; LINUXOSX-NEXT: # kill: def $esi killed $esi killed $rsi
-; LINUXOSX-NEXT: subl %r8d, %esi
-; LINUXOSX-NEXT: leal (%r9,%r12), %r8d
-; LINUXOSX-NEXT: movl %r9d, %edi
-; LINUXOSX-NEXT: subl %r12d, %edi
-; LINUXOSX-NEXT: movl %eax, %edx
-; LINUXOSX-NEXT: subl %ecx, %edx
-; LINUXOSX-NEXT: imull %edx, %edi
-; LINUXOSX-NEXT: leal (%r13,%r14), %edx
-; LINUXOSX-NEXT: movl %r13d, %ebx
-; LINUXOSX-NEXT: subl %r14d, %ebx
-; LINUXOSX-NEXT: imull %ebp, %ebx
-; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %ebp
-; LINUXOSX-NEXT: addl %edi, %ebx
-; LINUXOSX-NEXT: movl %r15d, %edi
-; LINUXOSX-NEXT: subl %ebp, %edi
-; LINUXOSX-NEXT: imull %esi, %edi
-; LINUXOSX-NEXT: addl %ebx, %edi
-; LINUXOSX-NEXT: addl %ecx, %eax
-; LINUXOSX-NEXT: imull %r8d, %eax
-; LINUXOSX-NEXT: imull %r10d, %edx
-; LINUXOSX-NEXT: addl %edx, %eax
-; LINUXOSX-NEXT: addl %r15d, %ebp
-; LINUXOSX-NEXT: imull %r11d, %ebp
-; LINUXOSX-NEXT: addl %ebp, %eax
-; LINUXOSX-NEXT: addl %edi, %eax
-; LINUXOSX-NEXT: popq %rbx
-; LINUXOSX-NEXT: popq %rbp
-; LINUXOSX-NEXT: retq
i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
%x1 = sub i32 %a1, %a2
%x2 = sub i32 %a3, %a4
@@ -364,111 +136,53 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a
ret i32 %r1
}
+; X32: testf32_stack
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+
+; LINUXOSX: testf32_stack
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: retq
+
; Test that parameters, overflowing register capacity, are passed through the stack
define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
-; WIN32-LABEL: testf32_stack:
-; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %ebp
-; WIN32-NEXT: movl %esp, %ebp
-; WIN32-NEXT: andl $-16, %esp
-; WIN32-NEXT: subl $48, %esp
-; WIN32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; WIN32-NEXT: movaps %xmm6, (%esp) # 16-byte Spill
-; WIN32-NEXT: movaps %xmm5, %xmm6
-; WIN32-NEXT: movaps %xmm4, %xmm5
-; WIN32-NEXT: movaps %xmm3, %xmm4
-; WIN32-NEXT: movaps %xmm2, %xmm3
-; WIN32-NEXT: movaps %xmm1, %xmm2
-; WIN32-NEXT: movaps %xmm0, %xmm1
-; WIN32-NEXT: movups 120(%ebp), %xmm7
-; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; WIN32-NEXT: addps %xmm7, %xmm0
-; WIN32-NEXT: movups 248(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm0
-; WIN32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; WIN32-NEXT: movups 104(%ebp), %xmm7
-; WIN32-NEXT: movaps (%esp), %xmm0 # 16-byte Reload
-; WIN32-NEXT: addps %xmm7, %xmm0
-; WIN32-NEXT: movups 232(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm0
-; WIN32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill
-; WIN32-NEXT: movups 88(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm6
-; WIN32-NEXT: movups 216(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm6
-; WIN32-NEXT: movups 72(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm5
-; WIN32-NEXT: movups 200(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm5
-; WIN32-NEXT: movups 56(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm4
-; WIN32-NEXT: movups 184(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm4
-; WIN32-NEXT: movups 40(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm3
-; WIN32-NEXT: movups 168(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm3
-; WIN32-NEXT: movups 24(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm2
-; WIN32-NEXT: movups 152(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm2
-; WIN32-NEXT: movups 8(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm1
-; WIN32-NEXT: movups 136(%ebp), %xmm7
-; WIN32-NEXT: addps %xmm7, %xmm1
-; WIN32-NEXT: movaps %xmm1, %xmm0
-; WIN32-NEXT: movaps %xmm2, %xmm1
-; WIN32-NEXT: movaps %xmm3, %xmm2
-; WIN32-NEXT: movaps %xmm4, %xmm3
-; WIN32-NEXT: movaps %xmm5, %xmm4
-; WIN32-NEXT: movaps %xmm6, %xmm5
-; WIN32-NEXT: movaps (%esp), %xmm6 # 16-byte Reload
-; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
-; WIN32-NEXT: movl %ebp, %esp
-; WIN32-NEXT: popl %ebp
-; WIN32-NEXT: retl
-;
-; WIN64-LABEL: testf32_stack:
-; WIN64: # %bb.0:
-; WIN64-NEXT: pushq %rax
-; WIN64-NEXT: addps %xmm15, %xmm7
-; WIN64-NEXT: addps %xmm14, %xmm6
-; WIN64-NEXT: addps %xmm13, %xmm5
-; WIN64-NEXT: addps %xmm12, %xmm4
-; WIN64-NEXT: addps %xmm11, %xmm3
-; WIN64-NEXT: addps %xmm10, %xmm2
-; WIN64-NEXT: addps %xmm9, %xmm1
-; WIN64-NEXT: addps %xmm8, %xmm0
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
-; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
-; WIN64-NEXT: popq %rax
-; WIN64-NEXT: retq
-;
-; LINUXOSX-LABEL: testf32_stack:
-; LINUXOSX: # %bb.0:
-; LINUXOSX-NEXT: addps %xmm15, %xmm7
-; LINUXOSX-NEXT: addps %xmm14, %xmm6
-; LINUXOSX-NEXT: addps %xmm13, %xmm5
-; LINUXOSX-NEXT: addps %xmm12, %xmm4
-; LINUXOSX-NEXT: addps %xmm11, %xmm3
-; LINUXOSX-NEXT: addps %xmm10, %xmm2
-; LINUXOSX-NEXT: addps %xmm9, %xmm1
-; LINUXOSX-NEXT: addps %xmm8, %xmm0
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
-; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
-; LINUXOSX-NEXT: retq
%x1 = fadd <32 x float> %a, %b
%x2 = fadd <32 x float> %x1, %c
ret <32 x float> %x2
diff --git a/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
index 7b4436735a9dc..a5f62dde81fe4 100644
--- a/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
+++ b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -1,16 +1,13 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "i686-unknown-linux-gnu"
define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: addps (%eax), %xmm0
-; CHECK-NEXT: retl
- %A = load <4 x float>, <4 x float>* %P, align 4
- %B = fadd <4 x float> %A, %In
- ret <4 x float> %B
+ %A = load <4 x float>, <4 x float>* %P, align 4
+ %B = fadd <4 x float> %A, %In
+ ret <4 x float> %B
+
+; CHECK-LABEL: @foo
+; CHECK: addps (%eax), %xmm0
}
diff --git a/llvm/test/CodeGen/X86/sse_reload_fold.ll b/llvm/test/CodeGen/X86/sse_reload_fold.ll
index f159b643bbdfc..fd8db3be10639 100644
--- a/llvm/test/CodeGen/X86/sse_reload_fold.ll
+++ b/llvm/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,5 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic 2>&1 | FileCheck %s
+; CHECK: fail
+; CHECK-NOT: fail
declare float @test_f(float %f)
declare double @test_d(double %f)
@@ -26,312 +27,91 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
define float @foo(float %f) {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: callq test_f at PLT
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: sqrtss %xmm0, %xmm0
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%a = call float @test_f(float %f)
%t = call float @llvm.sqrt.f32(float %f)
ret float %t
}
-
define double @doo(double %f) {
-; CHECK-LABEL: doo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: callq test_d at PLT
-; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: sqrtsd %xmm0, %xmm0
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%a = call double @test_d(double %f)
%t = call double @llvm.sqrt.f64(double %f)
ret double %t
}
-
define <4 x float> @a0(<4 x float> %f) {
-; CHECK-LABEL: a0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: rsqrtps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @a1(<4 x float> %f) {
-; CHECK-LABEL: a1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: sqrtps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @a2(<4 x float> %f) {
-; CHECK-LABEL: a2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: rcpps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @b3(<4 x float> %f) {
-; CHECK-LABEL: b3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: minps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @b4(<4 x float> %f) {
-; CHECK-LABEL: b4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: maxps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @b5(<4 x float> %f) {
-; CHECK-LABEL: b5:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: cmpordps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
ret <4 x float> %t
}
-
define <4 x float> @b6(<4 x float> %f) {
-; CHECK-LABEL: b6:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: addsubps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @b7(<4 x float> %f) {
-; CHECK-LABEL: b7:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: haddps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
-
define <4 x float> @b8(<4 x float> %f) {
-; CHECK-LABEL: b8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vf at PLT
-; CHECK-NEXT: hsubps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
-
define <2 x double> @c1(<2 x double> %f) {
-; CHECK-LABEL: c1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: sqrtpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%a = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
ret <2 x double> %t
}
-
define <2 x double> @d3(<2 x double> %f) {
-; CHECK-LABEL: d3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: minpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
-
define <2 x double> @d4(<2 x double> %f) {
-; CHECK-LABEL: d4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: maxpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
-
define <2 x double> @d5(<2 x double> %f) {
-; CHECK-LABEL: d5:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: cmpordpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
ret <2 x double> %t
}
-
define <2 x double> @d6(<2 x double> %f) {
-; CHECK-LABEL: d6:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: addsubpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
-
define <2 x double> @d7(<2 x double> %f) {
-; CHECK-LABEL: d7:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: haddpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
-
define <2 x double> @d8(<2 x double> %f) {
-; CHECK-LABEL: d8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: hsubpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
@@ -346,21 +126,8 @@ define <2 x double> @d8(<2 x double> %f) {
; addq $24, %rsp
; ret
; RABasic still tries to fold this one.
+
define <2 x double> @z0(<2 x double> %f) {
-; CHECK-LABEL: z0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: callq test_vd at PLT
-; CHECK-NEXT: movapd (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: hsubpd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
%y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
ret <2 x double> %t
More information about the llvm-commits
mailing list