[llvm] r301396 - [X86][SSE] Add test case for repeated vector insertions of the same element (PR15298)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 26 05:23:32 PDT 2017
Author: rksimon
Date: Wed Apr 26 07:23:32 2017
New Revision: 301396
URL: http://llvm.org/viewvc/llvm-project?rev=301396&view=rev
Log:
[X86][SSE] Add test case for repeated vector insertions of the same element (PR15298)
Added:
llvm/trunk/test/CodeGen/X86/insertelement-duplicates.ll
Added: llvm/trunk/test/CodeGen/X86/insertelement-duplicates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-duplicates.ll?rev=301396&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-duplicates.ll (added)
+++ llvm/trunk/test/CodeGen/X86/insertelement-duplicates.ll Wed Apr 26 07:23:32 2017
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64
+
+define void @PR15298(<4 x float>* nocapture %source, <8 x float>* nocapture %dest) nounwind noinline {
+; SSE-32-LABEL: PR15298:
+; SSE-32: # BB#0: # %L.entry
+; SSE-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: movaps 304(%ecx), %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; SSE-32-NEXT: movups %xmm1, 624(%eax)
+; SSE-32-NEXT: movups %xmm0, 608(%eax)
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: PR15298:
+; SSE-64: # BB#0: # %L.entry
+; SSE-64-NEXT: movaps 304(%rdi), %xmm0
+; SSE-64-NEXT: xorps %xmm1, %xmm1
+; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
+; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; SSE-64-NEXT: movups %xmm1, 624(%rsi)
+; SSE-64-NEXT: movups %xmm0, 608(%rsi)
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: PR15298:
+; AVX-32: # BB#0: # %L.entry
+; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX-32-NEXT: vbroadcastss 304(%ecx), %xmm0
+; AVX-32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6,7]
+; AVX-32-NEXT: vmovups %ymm0, 608(%eax)
+; AVX-32-NEXT: vzeroupper
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: PR15298:
+; AVX-64: # BB#0: # %L.entry
+; AVX-64-NEXT: vbroadcastss 304(%rdi), %xmm0
+; AVX-64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6,7]
+; AVX-64-NEXT: vmovups %ymm0, 608(%rsi)
+; AVX-64-NEXT: vzeroupper
+; AVX-64-NEXT: retq
+L.entry:
+ %0 = getelementptr inbounds <4 x float>, <4 x float>* %source, i32 19
+ %1 = load <4 x float>, <4 x float>* %0, align 16
+ %2 = extractelement <4 x float> %1, i32 0
+ %3 = insertelement <8 x float> <float 0.000000e+00, float undef, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %2, i32 2
+ %4 = insertelement <8 x float> %3, float %2, i32 1
+ %5 = getelementptr <8 x float>, <8 x float>* %dest, i32 19
+ store <8 x float> %4, <8 x float>* %5, align 4
+ ret void
+}
More information about the llvm-commits
mailing list