[llvm] r265183 - [X86][SSE] Regenerated the vec_extract tests.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 1 13:55:20 PDT 2016
Author: rksimon
Date: Fri Apr 1 15:55:19 2016
New Revision: 265183
URL: http://llvm.org/viewvc/llvm-project?rev=265183&view=rev
Log:
[X86][SSE] Regenerated the vec_extract tests.
Modified:
llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll
llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll
llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll
llvm/trunk/test/CodeGen/X86/vec_extract.ll
Modified: llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll?rev=265183&r1=265182&r2=265183&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll Fri Apr 1 15:55:19 2016
@@ -1,36 +1,108 @@
-; RUN: llc < %s -march=x86-64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
define <8 x i32> @a(<8 x i32> %a) nounwind {
+; SSE-LABEL: a:
+; SSE: # BB#0:
+; SSE-NEXT: pslld $16, %xmm0
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: pslld $16, %xmm1
+; SSE-NEXT: psrad $16, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: a:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
+; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: a:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
+; AVX2-NEXT: vpsrad $16, %ymm0, %ymm0
+; AVX2-NEXT: retq
%b = trunc <8 x i32> %a to <8 x i16>
%c = sext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}
define <3 x i32> @b(<3 x i32> %a) nounwind {
+; SSE-LABEL: b:
+; SSE: # BB#0:
+; SSE-NEXT: pslld $16, %xmm0
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: b:
+; AVX: # BB#0:
+; AVX-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX-NEXT: retq
%b = trunc <3 x i32> %a to <3 x i16>
%c = sext <3 x i16> %b to <3 x i32>
ret <3 x i32> %c
}
define <1 x i32> @c(<1 x i32> %a) nounwind {
+; ALL-LABEL: c:
+; ALL: # BB#0:
+; ALL-NEXT: movswl %di, %eax
+; ALL-NEXT: retq
%b = trunc <1 x i32> %a to <1 x i16>
%c = sext <1 x i16> %b to <1 x i32>
ret <1 x i32> %c
}
define <8 x i32> @d(<8 x i32> %a) nounwind {
+; SSE-LABEL: d:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: d:
+; AVX1: # BB#0:
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: d:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT: retq
%b = trunc <8 x i32> %a to <8 x i16>
%c = zext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}
define <3 x i32> @e(<3 x i32> %a) nounwind {
+; SSE-LABEL: e:
+; SSE: # BB#0:
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: e:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
+; AVX-NEXT: retq
%b = trunc <3 x i32> %a to <3 x i16>
%c = zext <3 x i16> %b to <3 x i32>
ret <3 x i32> %c
}
define <1 x i32> @f(<1 x i32> %a) nounwind {
+; ALL-LABEL: f:
+; ALL: # BB#0:
+; ALL-NEXT: movzwl %di, %eax
+; ALL-NEXT: retq
%b = trunc <1 x i32> %a to <1 x i16>
%c = zext <1 x i16> %b to <1 x i32>
ret <1 x i32> %c
Modified: llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll?rev=265183&r1=265182&r2=265183&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll Fri Apr 1 15:55:19 2016
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; When extracting multiple consecutive elements from a larger
; vector into a smaller one, do it efficiently. We should use
@@ -8,11 +9,18 @@
; Extracting the low elements only requires using the right kind of store.
define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
-; CHECK-LABEL: low_v8f32_to_v4f32:
-; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps %xmm0, (%rdi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: low_v8f32_to_v4f32:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovaps %xmm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: low_v8f32_to_v4f32:
+; X64: # BB#0:
+; X64-NEXT: vmovaps %xmm0, (%rdi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ext0 = extractelement <8 x float> %v, i32 0
%ext1 = extractelement <8 x float> %v, i32 1
%ext2 = extractelement <8 x float> %v, i32 2
@@ -27,11 +35,18 @@ define void @low_v8f32_to_v4f32(<8 x flo
; Extracting the high elements requires just one AVX instruction.
define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
-; CHECK-LABEL: high_v8f32_to_v4f32:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: high_v8f32_to_v4f32:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: high_v8f32_to_v4f32:
+; X64: # BB#0:
+; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ext0 = extractelement <8 x float> %v, i32 4
%ext1 = extractelement <8 x float> %v, i32 5
%ext2 = extractelement <8 x float> %v, i32 6
@@ -48,11 +63,18 @@ define void @high_v8f32_to_v4f32(<8 x fl
; if we were actually using the vector in this function and
; have AVX2, we should generate vextracti128 (the int version).
define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
-; CHECK-LABEL: high_v8i32_to_v4i32:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: high_v8i32_to_v4i32:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: high_v8i32_to_v4i32:
+; X64: # BB#0:
+; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ext0 = extractelement <8 x i32> %v, i32 4
%ext1 = extractelement <8 x i32> %v, i32 5
%ext2 = extractelement <8 x i32> %v, i32 6
@@ -67,11 +89,18 @@ define void @high_v8i32_to_v4i32(<8 x i3
; Make sure that element size doesn't alter the codegen.
define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
-; CHECK-LABEL: high_v4f64_to_v2f64:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: high_v4f64_to_v2f64:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: high_v4f64_to_v2f64:
+; X64: # BB#0:
+; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ext0 = extractelement <4 x double> %v, i32 2
%ext1 = extractelement <4 x double> %v, i32 3
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
@@ -84,14 +113,25 @@ define void @high_v4f64_to_v2f64(<4 x do
; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
-; CHECK-LABEL: legal_vzmovl_2i32_8i32:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
-; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
-; CHECK-NEXT: vmovaps %ymm0, (%rsi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: legal_vzmovl_2i32_8i32:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; X32-NEXT: vmovaps %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: legal_vzmovl_2i32_8i32:
+; X64: # BB#0:
+; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; X64-NEXT: vmovaps %ymm0, (%rsi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ld = load <2 x i32>, <2 x i32>* %in, align 8
%ext = extractelement <2 x i32> %ld, i64 0
%ins = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %ext, i64 0
@@ -100,14 +140,25 @@ define void @legal_vzmovl_2i32_8i32(<2 x
}
define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
-; CHECK-LABEL: legal_vzmovl_2i64_4i64:
-; CHECK: # BB#0:
-; CHECK-NEXT: vmovupd (%rdi), %xmm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; CHECK-NEXT: vmovapd %ymm0, (%rsi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: legal_vzmovl_2i64_4i64:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovupd (%ecx), %xmm0
+; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; X32-NEXT: vmovapd %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: legal_vzmovl_2i64_4i64:
+; X64: # BB#0:
+; X64-NEXT: vmovupd (%rdi), %xmm0
+; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; X64-NEXT: vmovapd %ymm0, (%rsi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ld = load <2 x i64>, <2 x i64>* %in, align 8
%ext = extractelement <2 x i64> %ld, i64 0
%ins = insertelement <4 x i64> <i64 undef, i64 0, i64 0, i64 0>, i64 %ext, i64 0
@@ -116,14 +167,23 @@ define void @legal_vzmovl_2i64_4i64(<2 x
}
define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
-; CHECK-LABEL: legal_vzmovl_2f32_8f32:
-; CHECK: # BB#0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
-; CHECK-NEXT: vmovaps %ymm0, (%rsi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: legal_vzmovl_2f32_8f32:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovaps %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: legal_vzmovl_2f32_8f32:
+; X64: # BB#0:
+; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; X64-NEXT: vmovaps %ymm0, (%rsi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ld = load <2 x float>, <2 x float>* %in, align 8
%ext = extractelement <2 x float> %ld, i64 0
%ins = insertelement <8 x float> <float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
@@ -132,14 +192,25 @@ define void @legal_vzmovl_2f32_8f32(<2 x
}
define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
-; CHECK-LABEL: legal_vzmovl_2f64_4f64:
-; CHECK: # BB#0:
-; CHECK-NEXT: vmovupd (%rdi), %xmm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; CHECK-NEXT: vmovapd %ymm0, (%rsi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; X32-LABEL: legal_vzmovl_2f64_4f64:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovupd (%ecx), %xmm0
+; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; X32-NEXT: vmovapd %ymm0, (%eax)
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: legal_vzmovl_2f64_4f64:
+; X64: # BB#0:
+; X64-NEXT: vmovupd (%rdi), %xmm0
+; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; X64-NEXT: vmovapd %ymm0, (%rsi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%ld = load <2 x double>, <2 x double>* %in, align 8
%ext = extractelement <2 x double> %ld, i64 0
%ins = insertelement <4 x double> <double undef, double 0.0, double 0.0, double 0.0>, double %ext, i64 0
Modified: llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll?rev=265183&r1=265182&r2=265183&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll Fri Apr 1 15:55:19 2016
@@ -1,12 +1,35 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
-define i32 @test0(<1 x i64>* %v4) {
-; CHECK-LABEL: test0:
-; CHECK: # BB#0:{{.*}} %entry
-; CHECK: pshufw $238, (%[[REG:[a-z]+]]), %mm0
-; CHECK-NEXT: movd %mm0, %eax
-; CHECK-NEXT: addl $32, %eax
-; CHECK-NEXT: retq
+define i32 @test0(<1 x i64>* %v4) nounwind {
+; X32-LABEL: test0:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $24, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: pshufw $238, (%esp), %mm0 # mm0 = mem[2,3,2,3]
+; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-NEXT: movd %xmm0, %eax
+; X32-NEXT: addl $32, %eax
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; X64-LABEL: test0:
+; X64: # BB#0: # %entry
+; X64-NEXT: pshufw $238, (%rdi), %mm0 # mm0 = mem[2,3,2,3]
+; X64-NEXT: movd %mm0, %eax
+; X64-NEXT: addl $32, %eax
+; X64-NEXT: retq
entry:
%v5 = load <1 x i64>, <1 x i64>* %v4, align 8
%v12 = bitcast <1 x i64> %v5 to <4 x i16>
@@ -21,14 +44,32 @@ entry:
ret i32 %v20
}
-define i32 @test1(i32* nocapture readonly %ptr) {
-; CHECK-LABEL: test1:
-; CHECK: # BB#0:{{.*}} %entry
-; CHECK: movd (%[[REG]]), %mm0
-; CHECK-NEXT: pshufw $232, %mm0, %mm0
-; CHECK-NEXT: movd %mm0, %eax
-; CHECK-NEXT: emms
-; CHECK-NEXT: retq
+define i32 @test1(i32* nocapture readonly %ptr) nounwind {
+; X32-LABEL: test1:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movd (%eax), %mm0
+; X32-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3]
+; X32-NEXT: movq %mm0, (%esp)
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-NEXT: movd %xmm0, %eax
+; X32-NEXT: emms
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; X64-LABEL: test1:
+; X64: # BB#0: # %entry
+; X64-NEXT: movd (%rdi), %mm0
+; X64-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3]
+; X64-NEXT: movd %mm0, %eax
+; X64-NEXT: emms
+; X64-NEXT: retq
entry:
%0 = load i32, i32* %ptr, align 4
%1 = insertelement <2 x i32> undef, i32 %0, i32 0
@@ -47,13 +88,30 @@ entry:
ret i32 %12
}
-define i32 @test2(i32* nocapture readonly %ptr) {
-; CHECK-LABEL: test2:
-; CHECK: # BB#0:{{.*}} %entry
-; CHECK: pshufw $232, (%[[REG]]), %mm0
-; CHECK-NEXT: movd %mm0, %eax
-; CHECK-NEXT: emms
-; CHECK-NEXT: retq
+define i32 @test2(i32* nocapture readonly %ptr) nounwind {
+; X32-LABEL: test2:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: pshufw $232, (%eax), %mm0 # mm0 = mem[0,2,2,3]
+; X32-NEXT: movq %mm0, (%esp)
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-NEXT: movd %xmm0, %eax
+; X32-NEXT: emms
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; X64-LABEL: test2:
+; X64: # BB#0: # %entry
+; X64-NEXT: pshufw $232, (%rdi), %mm0 # mm0 = mem[0,2,2,3]
+; X64-NEXT: movd %mm0, %eax
+; X64-NEXT: emms
+; X64-NEXT: retq
entry:
%0 = bitcast i32* %ptr to x86_mmx*
%1 = load x86_mmx, x86_mmx* %0, align 8
Modified: llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll?rev=265183&r1=265182&r2=265183&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll Fri Apr 1 15:55:19 2016
@@ -1,59 +1,79 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
define void @t1(float* %R, <4 x float>* %P1) nounwind {
-; CHECK-LABEL: t1:
-; CHECK: # BB#0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movss 12(%ecx), %xmm0
-; CHECK-NEXT: movss %xmm0, (%eax)
-; CHECK-NEXT: retl
-
- %X = load <4 x float>, <4 x float>* %P1
- %tmp = extractelement <4 x float> %X, i32 3
- store float %tmp, float* %R
- ret void
+; X32-LABEL: t1:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: movss %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: t1:
+; X64: # BB#0:
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: movss %xmm0, (%rdi)
+; X64-NEXT: retq
+ %X = load <4 x float>, <4 x float>* %P1
+ %tmp = extractelement <4 x float> %X, i32 3
+ store float %tmp, float* %R
+ ret void
}
define float @t2(<4 x float>* %P1) nounwind {
-; CHECK-LABEL: t2:
-; CHECK: # BB#0:
-; CHECK-NEXT: pushl %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
-; CHECK-NEXT: movss %xmm0, (%esp)
-; CHECK-NEXT: flds (%esp)
-; CHECK-NEXT: popl %eax
-; CHECK-NEXT: retl
-
- %X = load <4 x float>, <4 x float>* %P1
- %tmp = extractelement <4 x float> %X, i32 2
- ret float %tmp
+; X32-LABEL: t2:
+; X32: # BB#0:
+; X32-NEXT: pushl %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; X32-NEXT: movss %xmm0, (%esp)
+; X32-NEXT: flds (%esp)
+; X32-NEXT: popl %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: t2:
+; X64: # BB#0:
+; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; X64-NEXT: retq
+ %X = load <4 x float>, <4 x float>* %P1
+ %tmp = extractelement <4 x float> %X, i32 2
+ ret float %tmp
}
define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
-; CHECK-LABEL: t3:
-; CHECK: # BB#0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%ecx), %ecx
-; CHECK-NEXT: movl %ecx, (%eax)
-; CHECK-NEXT: retl
-
- %X = load <4 x i32>, <4 x i32>* %P1
- %tmp = extractelement <4 x i32> %X, i32 3
- store i32 %tmp, i32* %R
- ret void
+; X32-LABEL: t3:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 12(%ecx), %ecx
+; X32-NEXT: movl %ecx, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: t3:
+; X64: # BB#0:
+; X64-NEXT: movl 12(%rsi), %eax
+; X64-NEXT: movl %eax, (%rdi)
+; X64-NEXT: retq
+ %X = load <4 x i32>, <4 x i32>* %P1
+ %tmp = extractelement <4 x i32> %X, i32 3
+ store i32 %tmp, i32* %R
+ ret void
}
define i32 @t4(<4 x i32>* %P1) nounwind {
-; CHECK-LABEL: t4:
-; CHECK: # BB#0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl 12(%eax), %eax
-; CHECK-NEXT: retl
-
- %X = load <4 x i32>, <4 x i32>* %P1
- %tmp = extractelement <4 x i32> %X, i32 3
- ret i32 %tmp
+; X32-LABEL: t4:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 12(%eax), %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: t4:
+; X64: # BB#0:
+; X64-NEXT: movl 12(%rdi), %eax
+; X64-NEXT: retq
+ %X = load <4 x i32>, <4 x i32>* %P1
+ %tmp = extractelement <4 x i32> %X, i32 3
+ ret i32 %tmp
}
Modified: llvm/trunk/test/CodeGen/X86/vec_extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract.ll?rev=265183&r1=265182&r2=265183&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract.ll Fri Apr 1 15:55:19 2016
@@ -1,74 +1,104 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64
define void @test1(<4 x float>* %F, float* %f) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movaps (%ecx), %xmm0
-; CHECK-NEXT: addps %xmm0, %xmm0
-; CHECK-NEXT: movss %xmm0, (%eax)
-; CHECK-NEXT: retl
+; X32-LABEL: test1:
+; X32: # BB#0: # %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movaps (%ecx), %xmm0
+; X32-NEXT: addps %xmm0, %xmm0
+; X32-NEXT: movss %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test1:
+; X64: # BB#0: # %entry
+; X64-NEXT: movaps (%rdi), %xmm0
+; X64-NEXT: addps %xmm0, %xmm0
+; X64-NEXT: movss %xmm0, (%rsi)
+; X64-NEXT: retq
entry:
- %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
- %tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
- %tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
- store float %tmp2, float* %f
- ret void
+ %tmp = load <4 x float>, <4 x float>* %F
+ %tmp7 = fadd <4 x float> %tmp, %tmp
+ %tmp2 = extractelement <4 x float> %tmp7, i32 0
+ store float %tmp2, float* %f
+ ret void
}
define float @test2(<4 x float>* %F, float* %f) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: pushl %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movaps (%eax), %xmm0
-; CHECK-NEXT: addps %xmm0, %xmm0
-; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT: movss %xmm0, (%esp)
-; CHECK-NEXT: flds (%esp)
-; CHECK-NEXT: popl %eax
-; CHECK-NEXT: retl
+; X32-LABEL: test2:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movaps (%eax), %xmm0
+; X32-NEXT: addps %xmm0, %xmm0
+; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X32-NEXT: movss %xmm0, (%esp)
+; X32-NEXT: flds (%esp)
+; X32-NEXT: popl %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test2:
+; X64: # BB#0: # %entry
+; X64-NEXT: movaps (%rdi), %xmm0
+; X64-NEXT: addps %xmm0, %xmm0
+; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-NEXT: retq
entry:
- %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
- %tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
- %tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
- ret float %tmp2
+ %tmp = load <4 x float>, <4 x float>* %F
+ %tmp7 = fadd <4 x float> %tmp, %tmp
+ %tmp2 = extractelement <4 x float> %tmp7, i32 2
+ ret float %tmp2
}
define void @test3(float* %R, <4 x float>* %P1) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movss 12(%ecx), %xmm0
-; CHECK-NEXT: movss %xmm0, (%eax)
-; CHECK-NEXT: retl
+; X32-LABEL: test3:
+; X32: # BB#0: # %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: movss %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test3:
+; X64: # BB#0: # %entry
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: movss %xmm0, (%rdi)
+; X64-NEXT: retq
entry:
- %X = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
- %tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
- store float %tmp, float* %R
- ret void
+ %X = load <4 x float>, <4 x float>* %P1
+ %tmp = extractelement <4 x float> %X, i32 3
+ store float %tmp, float* %R
+ ret void
}
define double @test4(double %A) nounwind {
-; CHECK-LABEL: test4:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: subl $12, %esp
-; CHECK-NEXT: calll foo
-; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT: addsd {{[0-9]+}}(%esp), %xmm0
-; CHECK-NEXT: movsd %xmm0, (%esp)
-; CHECK-NEXT: fldl (%esp)
-; CHECK-NEXT: addl $12, %esp
-; CHECK-NEXT: retl
+; X32-LABEL: test4:
+; X32: # BB#0: # %entry
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: calll foo
+; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X32-NEXT: addsd {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT: movsd %xmm0, (%esp)
+; X32-NEXT: fldl (%esp)
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: test4:
+; X64: # BB#0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; X64-NEXT: callq foo
+; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
+; X64-NEXT: popq %rax
+; X64-NEXT: retq
entry:
- %tmp1 = call <2 x double> @foo( ) ; <<2 x double>> [#uses=1]
- %tmp2 = extractelement <2 x double> %tmp1, i32 1 ; <double> [#uses=1]
- %tmp3 = fadd double %tmp2, %A ; <double> [#uses=1]
- ret double %tmp3
+ %tmp1 = call <2 x double> @foo( )
+ %tmp2 = extractelement <2 x double> %tmp1, i32 1
+ %tmp3 = fadd double %tmp2, %A
+ ret double %tmp3
}
declare <2 x double> @foo()
More information about the llvm-commits
mailing list