[llvm] r265179 - [X86][SSE] Regenerated the vec_insert tests.

Fri Apr 1 12:42:23 PDT 2016

Author: rksimon
Date: Fri Apr  1 14:42:23 2016
New Revision: 265179

URL: http://llvm.org/viewvc/llvm-project?rev=265179&view=rev
Log:
[X86][SSE] Regenerated the vec_insert tests.

Modified:
    llvm/trunk/test/CodeGen/X86/vec_ins_extract-1.ll
    llvm/trunk/test/CodeGen/X86/vec_ins_extract.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-3.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-4.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-5.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-7.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-8.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-9.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll

Modified: llvm/trunk/test/CodeGen/X86/vec_ins_extract-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ins_extract-1.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================

--- llvm/trunk/test/CodeGen/X86/vec_ins_extract-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ins_extract-1.ll Fri Apr  1 14:42:23 2016
@@ -1,24 +1,109 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep "(%esp,%eax,4)" | count 4
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
 
 ; Inserts and extracts with variable indices must be lowered
 ; to memory accesses.
 
 define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+; X32-LABEL: t0:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movl $76, (%esp,%eax,4)
+; X32-NEXT:    movl (%esp), %eax
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: t0:
+; X64:       # BB#0:
+; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    movl $76, -24(%rsp,%rax,4)
+; X64-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    retq
   %t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7
   %t9 = extractelement <4 x i32> %t13, i32 0
   ret i32 %t9
 }
+
 define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+; X32-LABEL: t1:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    movl $76, %ecx
+; X32-NEXT:    pinsrd $0, %ecx, %xmm0
+; X32-NEXT:    movdqa %xmm0, (%esp)
+; X32-NEXT:    movl (%esp,%eax,4), %eax
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: t1:
+; X64:       # BB#0:
+; X64-NEXT:    movl $76, %eax
+; X64-NEXT:    pinsrd $0, %eax, %xmm0
+; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    movl -24(%rsp,%rax,4), %eax
+; X64-NEXT:    retq
   %t13 = insertelement <4 x i32> %t8, i32 76, i32 0
   %t9 = extractelement <4 x i32> %t13, i32 %t7
   ret i32 %t9
 }
+
 define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+; X32-LABEL: t2:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    movdqa %xmm0, (%esp)
+; X32-NEXT:    pinsrd $0, (%esp,%eax,4), %xmm0
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: t2:
+; X64:       # BB#0:
+; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    pinsrd $0, -24(%rsp,%rax,4), %xmm0
+; X64-NEXT:    retq
   %t9 = extractelement <4 x i32> %t8, i32 %t7
   %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0
   ret <4 x i32> %t13
 }
+
 define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+; X32-LABEL: t3:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movd %xmm0, (%esp,%eax,4)
+; X32-NEXT:    movaps (%esp), %xmm0
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: t3:
+; X64:       # BB#0:
+; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    movd %xmm0, -24(%rsp,%rax,4)
+; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT:    retq
   %t9 = extractelement <4 x i32> %t8, i32 0
   %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 %t7
   ret <4 x i32> %t13

Modified: llvm/trunk/test/CodeGen/X86/vec_ins_extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ins_extract.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ins_extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ins_extract.ll Fri Apr  1 14:42:23 2016
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: opt < %s -scalarrepl -instcombine | \
 ; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp
 

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-3.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-3.ll Fri Apr  1 14:42:23 2016
@@ -1,10 +1,23 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64
 
 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
-; CHECK-LABEL: t1:
-; CHECK:  punpcklqdq 
-; CHECK-NEXT:  retq 
-
+; X32-LABEL: t1:
+; X32:       # BB#0:
+; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: t1:
+; X64:       # BB#0:
+; X64-NEXT:    movd %rdi, %xmm1
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    retq
   %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
   ret <2 x i64> %tmp1
 }

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-4.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-4.ll Fri Apr  1 14:42:23 2016
@@ -1,11 +1,40 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9.2.2"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-darwin9.2.2 -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9.2.2 -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64
 
 define <8 x float> @f(<8 x float> %a, i32 %b) nounwind  {
+; X32-LABEL: f:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-32, %esp
+; X32-NEXT:    subl $64, %esp
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    movaps %xmm1, {{[0-9]+}}(%esp)
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movl $1084227584, (%esp,%eax,4) ## imm = 0x40A00000
+; X32-NEXT:    movaps (%esp), %xmm0
+; X32-NEXT:    movaps {{[0-9]+}}(%esp), %xmm1
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: f:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    movq %rsp, %rbp
+; X64-NEXT:    andq $-32, %rsp
+; X64-NEXT:    subq $64, %rsp
+; X64-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movaps %xmm0, (%rsp)
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    movl $1084227584, (%rsp,%rax,4) ## imm = 0x40A00000
+; X64-NEXT:    movaps (%rsp), %xmm0
+; X64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; X64-NEXT:    movq %rbp, %rsp
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
 entry:
-	%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b		; <<4 x float>> [#uses=1]
-	ret <8 x float> %vecins
+  %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
+  ret <8 x float> %vecins
 }

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-5.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-5.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-5.ll Fri Apr  1 14:42:23 2016
@@ -1,17 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X64
+
 ; There are no MMX operations in @t1
 
 define void  @t1(i32 %a, x86_mmx* %P) nounwind {
-; CHECK-LABEL: t1:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    shll $12, %ecx
-; CHECK-NEXT:    movd %ecx, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; CHECK-NEXT:    movq %xmm0, (%eax)
-; CHECK-NEXT:    retl
+; X32-LABEL: t1:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    shll $12, %ecx
+; X32-NEXT:    movd %ecx, %xmm0
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; X32-NEXT:    movq %xmm0, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: t1:
+; X64:       # BB#0:
+; X64-NEXT:    shll $12, %edi
+; X64-NEXT:    movd %rdi, %xmm0
+; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT:    movq %xmm0, (%rsi)
+; X64-NEXT:    retq
  %tmp12 = shl i32 %a, 12
  %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
  %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
@@ -21,87 +32,135 @@ define void  @t1(i32 %a, x86_mmx* %P) no
 }
 
 define <4 x float> @t2(<4 x float>* %P) nounwind {
-; CHECK-LABEL: t2:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movaps (%eax), %xmm1
-; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
-; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
-; CHECK-NEXT:    retl
+; X32-LABEL: t2:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movaps (%eax), %xmm1
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: t2:
+; X64:       # BB#0:
+; X64-NEXT:    movaps (%rdi), %xmm1
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; X64-NEXT:    retq
   %tmp1 = load <4 x float>, <4 x float>* %P
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
   ret <4 x float> %tmp2
 }
 
 define <4 x float> @t3(<4 x float>* %P) nounwind {
-; CHECK-LABEL: t3:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movapd (%eax), %xmm0
-; CHECK-NEXT:    xorpd %xmm1, %xmm1
-; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; CHECK-NEXT:    retl
+; X32-LABEL: t3:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movapd (%eax), %xmm0
+; X32-NEXT:    xorpd %xmm1, %xmm1
+; X32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; X32-NEXT:    retl
+;
+; X64-LABEL: t3:
+; X64:       # BB#0:
+; X64-NEXT:    movapd (%rdi), %xmm0
+; X64-NEXT:    xorpd %xmm1, %xmm1
+; X64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; X64-NEXT:    retq
   %tmp1 = load <4 x float>, <4 x float>* %P
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
   ret <4 x float> %tmp2
 }
 
 define <4 x float> @t4(<4 x float>* %P) nounwind {
-; CHECK-LABEL: t4:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movaps (%eax), %xmm0
-; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
-; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
-; CHECK-NEXT:    retl
+; X32-LABEL: t4:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movaps (%eax), %xmm0
+; X32-NEXT:    xorps %xmm1, %xmm1
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: t4:
+; X64:       # BB#0:
+; X64-NEXT:    movaps (%rdi), %xmm0
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
+; X64-NEXT:    retq
   %tmp1 = load <4 x float>, <4 x float>* %P
   %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
   ret <4 x float> %tmp2
 }
 
 define <16 x i8> @t5(<16 x i8> %x) nounwind {
-; CHECK-LABEL: t5:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    psrlw $8, %xmm0
-; CHECK-NEXT:    retl
+; X32-LABEL: t5:
+; X32:       # BB#0:
+; X32-NEXT:    psrlw $8, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: t5:
+; X64:       # BB#0:
+; X64-NEXT:    psrlw $8, %xmm0
+; X64-NEXT:    retq
   %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
   ret <16 x i8> %s
 }
 
 define <16 x i8> @t6(<16 x i8> %x) nounwind {
-; CHECK-LABEL: t6:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    psrlw $8, %xmm0
-; CHECK-NEXT:    retl
+; X32-LABEL: t6:
+; X32:       # BB#0:
+; X32-NEXT:    psrlw $8, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: t6:
+; X64:       # BB#0:
+; X64-NEXT:    psrlw $8, %xmm0
+; X64-NEXT:    retq
   %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <16 x i8> %s
 }
 
 define <16 x i8> @t7(<16 x i8> %x) nounwind {
-; CHECK-LABEL: t7:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; CHECK-NEXT:    retl
+; X32-LABEL: t7:
+; X32:       # BB#0:
+; X32-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT:    retl
+;
+; X64-LABEL: t7:
+; X64:       # BB#0:
+; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X64-NEXT:    retq
   %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
   ret <16 x i8> %s
 }
 
 define <16 x i8> @t8(<16 x i8> %x) nounwind {
-; CHECK-LABEL: t8:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
-; CHECK-NEXT:    retl
+; X32-LABEL: t8:
+; X32:       # BB#0:
+; X32-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: t8:
+; X64:       # BB#0:
+; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; X64-NEXT:    retq
   %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
   ret <16 x i8> %s
 }
 
 define <16 x i8> @t9(<16 x i8> %x) nounwind {
-; CHECK-LABEL: t9:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
-; CHECK-NEXT:    retl
+; X32-LABEL: t9:
+; X32:       # BB#0:
+; X32-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: t9:
+; X64:       # BB#0:
+; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; X64-NEXT:    retq
   %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
   ret <16 x i8> %s
 }

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-7.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-7.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-7.ll Fri Apr  1 14:42:23 2016
@@ -1,26 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=i686-apple-darwin9 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+mmx,+sse4.2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+mmx,+sse4.2 | FileCheck %s --check-prefix=X64
 
 ; MMX insertelement is not available; these are promoted to XMM.
 ; (Without SSE they are split to two ints, and the code is much better.)
 
 define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
-; CHECK-LABEL: mmx_movzl:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    subl $20, %esp
-; CHECK-NEXT:    movq %mm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
-; CHECK-NEXT:    movl $32, %eax
-; CHECK-NEXT:    pinsrd $0, %eax, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; CHECK-NEXT:    movq %xmm1, (%esp)
-; CHECK-NEXT:    movq (%esp), %mm0
-; CHECK-NEXT:    addl $20, %esp
-; CHECK-NEXT:    retl
+; X32-LABEL: mmx_movzl:
+; X32:       ## BB#0:
+; X32-NEXT:    subl $20, %esp
+; X32-NEXT:    movq %mm0, {{[0-9]+}}(%esp)
+; X32-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; X32-NEXT:    movl $32, %eax
+; X32-NEXT:    pinsrd $0, %eax, %xmm0
+; X32-NEXT:    pxor %xmm1, %xmm1
+; X32-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; X32-NEXT:    movq %xmm1, (%esp)
+; X32-NEXT:    movq (%esp), %mm0
+; X32-NEXT:    addl $20, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: mmx_movzl:
+; X64:       ## BB#0:
+; X64-NEXT:    movdq2q %xmm0, %mm0
+; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
+; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    pinsrq $0, %rax, %xmm1
+; X64-NEXT:    pxor %xmm0, %xmm0
+; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; X64-NEXT:    retq
   %tmp = bitcast x86_mmx %x to <2 x i32>
-  %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0		; <<2 x i32>> [#uses=1]
-  %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+  %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0
+  %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1
   %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
   ret x86_mmx %tmp9
 }

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-8.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-8.ll Fri Apr  1 14:42:23 2016
@@ -1,15 +1,58 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
 
 ; tests variable insert and extract of a 4 x i32
 
-define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
+; X32-LABEL: var_insert:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    movl 12(%ebp), %ecx
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movl %eax, (%esp,%ecx,4)
+; X32-NEXT:    movaps (%esp), %xmm0
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: var_insert:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movl %edi, -24(%rsp,%rax,4)
+; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT:    retq
 entry:
-	%tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %tmp3
+  %tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx
+  ret <4 x i32> %tmp3
 }
 
-define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind  {
+define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind {
+; X32-LABEL: var_extract:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movl (%esp,%eax,4), %eax
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: var_extract:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    movl -24(%rsp,%rax,4), %eax
+; X64-NEXT:    retq
 entry:
-	%tmp3 = extractelement <4 x i32> %x, i32 %idx		; <<i32>> [#uses=1]
-	ret i32 %tmp3
+  %tmp3 = extractelement <4 x i32> %x, i32 %idx
+  ret i32 %tmp3
 }

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-9.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-9.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-9.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-9.ll Fri Apr  1 14:42:23 2016
@@ -1,9 +1,21 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t
-; RUN: grep pinsrd %t | count 1
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
 
 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+; X32-LABEL: var_insert2:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    pinsrd $3, {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: var_insert2:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movd %edi, %xmm0
+; X64-NEXT:    pinsrd $3, %esi, %xmm0
+; X64-NEXT:    retq
 entry:
-	%tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0		; <<4 x i32>> [#uses=1]
-	%tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %tmp4
+  %tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0
+  %tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3
+  ret <4 x i32> %tmp4
 }

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll?rev=265179&r1=265178&r2=265179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll Fri Apr  1 14:42:23 2016
@@ -1,37 +1,55 @@
-; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-32
-; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s -check-prefix=X86-64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s --check-prefix=X64
 
 ; This is not an MMX operation; promoted to XMM.
 define x86_mmx @t0(i32 %A) nounwind {
-; X86-32-LABEL: t0:
-; X86-32:       ## BB#0:
-; X86-32:    movd {{[0-9]+}}(%esp), %xmm0
-; X86-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; X86-32-NEXT:    movq %xmm0, (%esp)
-; X86-32-NEXT:    movq (%esp), %mm0
-; X86-32-NEXT:    addl $12, %esp
-; X86-32-NEXT:    retl
+; X32-LABEL: t0:
+; X32:       ## BB#0:
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; X32-NEXT:    movq %xmm0, (%esp)
+; X32-NEXT:    movq (%esp), %mm0
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: t0:
+; X64:       ## BB#0:
+; X64-NEXT:    movd %rdi, %xmm0
+; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT:    retq
   %tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1
   %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
   ret x86_mmx %tmp4
 }
 
 define <8 x i8> @t1(i8 zeroext %x) nounwind {
-; X86-32-LABEL: t1:
-; X86-32:       ## BB#0:
-; X86-32-NOT:  movl
-; X86-32-NEXT:    movd {{[0-9]+}}(%esp), %xmm0
-; X86-32-NEXT:    retl
+; X32-LABEL: t1:
+; X32:       ## BB#0:
+; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: t1:
+; X64:       ## BB#0:
+; X64-NEXT:    movd %edi, %xmm0
+; X64-NEXT:    retq
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
   ret <8 x i8> %r
 }
 
 ; PR2574
 define <2 x float> @t2(<2 x float> %a0) {
-; X86-32-LABEL: t2:
-; X86-32:       ## BB#0:
-; X86-32-NEXT:    xorps %xmm0, %xmm0
-; X86-32-NEXT:    retl
+; X32-LABEL: t2:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: t2:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    retq
   %v1 = insertelement <2 x float> %a0, float 0.000000e+00, i32 0
   %v2 = insertelement <2 x float> %v1, float 0.000000e+00, i32 1
   ret <2 x float> %v2
@@ -42,14 +60,31 @@ define <2 x float> @t2(<2 x float> %a0)
 
 ; PR2562
 define void @t3() {
-; X86-64-LABEL: t3:
-; X86-64:       ## BB#0:
-; X86-64:    pmovzxwd (%rcx)
-; X86-64-NEXT:    movzwl
-; X86-64-NEXT:    pinsrd $0
-; X86-64-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X86-64-NEXT:    movq %xmm0
-; X86-64-NEXT:    retq
+; X32-LABEL: t3:
+; X32:       ## BB#0:
+; X32-NEXT:    movl L_g0$non_lazy_ptr, %eax
+; X32-NEXT:    movl L_g1$non_lazy_ptr, %ecx
+; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-NEXT:    movzwl (%eax), %eax
+; X32-NEXT:    movd %eax, %xmm1
+; X32-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; X32-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-NEXT:    movq %xmm0, (%ecx)
+; X32-NEXT:    retl
+;
+; X64-LABEL: t3:
+; X64:       ## BB#0:
+; X64-NEXT:    movq _g0@{{.*}}(%rip), %rax
+; X64-NEXT:    movq _g1@{{.*}}(%rip), %rcx
+; X64-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-NEXT:    movzwl (%rax), %eax
+; X64-NEXT:    pinsrd $0, %eax, %xmm0
+; X64-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X64-NEXT:    movq %xmm0, (%rcx)
+; X64-NEXT:    retq
   load i16, i16* @g0
   load <4 x i16>, <4 x i16>* @g1
   insertelement <4 x i16> %2, i16 %1, i32 0