[llvm] r272577 - [X86][SSE] Added extract to scalar nontemporal store tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 13 12:08:28 PDT 2016
Author: rksimon
Date: Mon Jun 13 14:08:28 2016
New Revision: 272577
URL: http://llvm.org/viewvc/llvm-project?rev=272577&view=rev
Log:
[X86][SSE] Added extract to scalar nontemporal store tests
Modified:
llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=272577&r1=272576&r2=272577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Mon Jun 13 14:08:28 2016
@@ -461,6 +461,137 @@ define void @test_arg_i64(i64 %arg, i64*
ret void
}
+; Extract versions
+
+define void @test_extract_f32(<4 x float> %arg, float* %dst) {
+; SSE2-LABEL: test_extract_f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movss %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_extract_f32:
+; SSE4A: # BB#0:
+; SSE4A-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE4A-NEXT: movss %xmm0, (%rdi)
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_extract_f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: extractps $1, %xmm0, %eax
+; SSE41-NEXT: movntil %eax, (%rdi)
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_extract_f32:
+; AVX: # BB#0:
+; AVX-NEXT: vextractps $1, %xmm0, %eax
+; AVX-NEXT: movntil %eax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_extract_f32:
+; VLX: # BB#0:
+; VLX-NEXT: vextractps $1, %xmm0, %eax
+; VLX-NEXT: movntil %eax, (%rdi)
+; VLX-NEXT: retq
+ %1 = extractelement <4 x float> %arg, i32 1
+ store float %1, float* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_extract_i32(<4 x i32> %arg, i32* %dst) {
+; SSE2-LABEL: test_extract_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movntil %eax, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_extract_i32:
+; SSE4A: # BB#0:
+; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE4A-NEXT: movd %xmm0, %eax
+; SSE4A-NEXT: movntil %eax, (%rdi)
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_extract_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: movntil %eax, (%rdi)
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_extract_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: movntil %eax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_extract_i32:
+; VLX: # BB#0:
+; VLX-NEXT: vpextrd $1, %xmm0, %eax
+; VLX-NEXT: movntil %eax, (%rdi)
+; VLX-NEXT: retq
+ %1 = extractelement <4 x i32> %arg, i32 1
+ store i32 %1, i32* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_extract_f64(<2 x double> %arg, double* %dst) {
+; SSE-LABEL: test_extract_f64:
+; SSE: # BB#0:
+; SSE-NEXT: movhpd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_extract_f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovhpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_extract_f64:
+; VLX: # BB#0:
+; VLX-NEXT: vmovhpd %xmm0, (%rdi)
+; VLX-NEXT: retq
+ %1 = extractelement <2 x double> %arg, i32 1
+ store double %1, double* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_extract_i64(<2 x i64> %arg, i64* %dst) {
+; SSE2-LABEL: test_extract_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: movntiq %rax, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_extract_i64:
+; SSE4A: # BB#0:
+; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE4A-NEXT: movd %xmm0, %rax
+; SSE4A-NEXT: movntiq %rax, (%rdi)
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_extract_i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrq $1, %xmm0, %rax
+; SSE41-NEXT: movntiq %rax, (%rdi)
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_extract_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: movntiq %rax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_extract_i64:
+; VLX: # BB#0:
+; VLX-NEXT: vpextrq $1, %xmm0, %rax
+; VLX-NEXT: movntiq %rax, (%rdi)
+; VLX-NEXT: retq
+ %1 = extractelement <2 x i64> %arg, i32 1
+ store i64 %1, i64* %dst, align 1, !nontemporal !1
+ ret void
+}
+
; And now XMM versions.
define void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {
More information about the llvm-commits
mailing list