[llvm] r263640 - [X86] Regenerated + extended widened vector conversion tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 16 08:33:43 PDT 2016


Author: rksimon
Date: Wed Mar 16 10:33:43 2016
New Revision: 263640

URL: http://llvm.org/viewvc/llvm-project?rev=263640&view=rev
Log:
[X86] Regenerated + extended widened vector conversion tests
- Ensure we test X86 + X64
- sitopfp / uitofp requires testing for SSE2 and SSE42 as well (part of the fix for PR26953)

Modified:
    llvm/trunk/test/CodeGen/X86/widen_conv-1.ll
    llvm/trunk/test/CodeGen/X86/widen_conv-2.ll
    llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
    llvm/trunk/test/CodeGen/X86/widen_conv-4.ll

Modified: llvm/trunk/test/CodeGen/X86/widen_conv-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-1.ll?rev=263640&r1=263639&r2=263640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-1.ll Wed Mar 16 10:33:43 2016
@@ -1,12 +1,101 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: paddd
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64
 
 ; truncate v2i64 to v2i32
 
-define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+define void @convert_v2i64_to_v2i32(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+; X86-LABEL: convert_v2i64_to_v2i32:
+; X86:       # BB#0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    paddd .LCPI0_0, %xmm0
+; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-NEXT:    movq %xmm0, (%eax)
+; X86-NEXT:    retl
+;
+; X64-LABEL: convert_v2i64_to_v2i32:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT:    movq %xmm0, (%rdi)
+; X64-NEXT:    retq
 entry:
 	%val = trunc <2 x i64> %src to <2 x i32>
 	%add = add <2 x i32> %val, < i32 1, i32 1 >
 	store <2 x i32> %add, <2 x i32>* %dst.addr
 	ret void
 }
+
+; truncate v3i32 to v3i8
+
+define void @convert_v3i32_to_v3i8(<3 x i8>* %dst.addr, <3 x i32>* %src.addr) nounwind {
+; X86-LABEL: convert_v3i32_to_v3i8:
+; X86:       # BB#0: # %entry
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movdqa (%ecx), %xmm0
+; X86-NEXT:    paddd .LCPI1_0, %xmm0
+; X86-NEXT:    pextrb $8, %xmm0, 2(%eax)
+; X86-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X86-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; X86-NEXT:    movd %xmm0, %ecx
+; X86-NEXT:    movw %cx, (%eax)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: convert_v3i32_to_v3i8:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movdqa (%rsi), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    pextrb $8, %xmm0, 2(%rdi)
+; X64-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X64-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    movw %ax, (%rdi)
+; X64-NEXT:    retq
+entry:
+	%load = load <3 x i32>, <3 x i32>* %src.addr
+	%val = trunc <3 x i32> %load to <3 x i8>
+	%add = add <3 x i8> %val, < i8 1, i8 1, i8 1 >
+	store <3 x i8> %add, <3 x i8>* %dst.addr
+	ret void
+}
+
+; truncate v5i16 to v5i8
+
+define void @convert_v5i16_to_v5i8(<5 x i8>* %dst.addr, <5 x i16>* %src.addr) nounwind {
+; X86-LABEL: convert_v5i16_to_v5i8:
+; X86:       # BB#0: # %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl 12(%ebp), %ecx
+; X86-NEXT:    movdqa (%ecx), %xmm0
+; X86-NEXT:    paddw .LCPI2_0, %xmm0
+; X86-NEXT:    pextrb $8, %xmm0, 4(%eax)
+; X86-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; X86-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; X86-NEXT:    movd %xmm0, (%eax)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: convert_v5i16_to_v5i8:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movdqa (%rsi), %xmm0
+; X64-NEXT:    paddw {{.*}}(%rip), %xmm0
+; X64-NEXT:    pextrb $8, %xmm0, 4(%rdi)
+; X64-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; X64-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; X64-NEXT:    movd %xmm0, (%rdi)
+; X64-NEXT:    retq
+entry:
+	%load = load <5 x i16>, <5 x i16>* %src.addr
+	%val = trunc <5 x i16> %load to <5 x i8>
+	%add = add <5 x i8> %val, < i8 1, i8 1, i8 1, i8 1, i8 1 >
+	store <5 x i8> %add, <5 x i8>* %dst.addr
+	ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/widen_conv-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-2.ll?rev=263640&r1=263639&r2=263640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-2.ll Wed Mar 16 10:33:43 2016
@@ -1,11 +1,26 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: psllq $48, %xmm0
-; CHECK: psrad $16, %xmm0
-; CHECK: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64
 
 ; sign extension v2i16 to v2i32
 
-define void @convert(<2 x i32>* %dst.addr, <2 x i16> %src) nounwind {
+define void @convert_v2i16_v2i32(<2 x i32>* %dst.addr, <2 x i16> %src) nounwind {
+; X86-LABEL: convert_v2i16_v2i32:
+; X86:       # BB#0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    psllq $48, %xmm0
+; X86-NEXT:    psrad $16, %xmm0
+; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X86-NEXT:    movq %xmm0, (%eax)
+; X86-NEXT:    retl
+;
+; X64-LABEL: convert_v2i16_v2i32:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    psllq $48, %xmm0
+; X64-NEXT:    psrad $16, %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X64-NEXT:    movq %xmm0, (%rdi)
+; X64-NEXT:    retq
 entry:
 	%signext = sext <2 x i16> %src to <2 x i32>		; <<12 x i8>> [#uses=1]
 	store <2 x i32> %signext, <2 x i32>* %dst.addr

Modified: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-3.ll?rev=263640&r1=263639&r2=263640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll Wed Mar 16 10:33:43 2016
@@ -1,11 +1,110 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: cvtdq2ps
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE42
 
 ; sign to float v2i16 to v2f32
 
-define void @convert(<2 x float>* %dst.addr, <2 x i16> %src) nounwind {
+define void @convert_v2i16_to_v2f32(<2 x float>* %dst.addr, <2 x i16> %src) nounwind {
+; X86-SSE2-LABEL: convert_v2i16_to_v2f32:
+; X86-SSE2:       # BB#0: # %entry
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    psllq $48, %xmm0
+; X86-SSE2-NEXT:    psrad $16, %xmm0
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X86-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE2-NEXT:    movss %xmm0, (%eax)
+; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86-SSE2-NEXT:    movss %xmm0, 4(%eax)
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE42-LABEL: convert_v2i16_to_v2f32:
+; X86-SSE42:       # BB#0: # %entry
+; X86-SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE42-NEXT:    psllq $48, %xmm0
+; X86-SSE42-NEXT:    psrad $16, %xmm0
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X86-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE42-NEXT:    extractps $1, %xmm0, 4(%eax)
+; X86-SSE42-NEXT:    movss %xmm0, (%eax)
+; X86-SSE42-NEXT:    retl
+;
+; X64-LABEL: convert_v2i16_to_v2f32:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    psllq $48, %xmm0
+; X64-NEXT:    psrad $16, %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X64-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-NEXT:    movlps %xmm0, (%rdi)
+; X64-NEXT:    retq
 entry:
 	%val = sitofp <2 x i16> %src to <2 x float>
 	store <2 x float> %val, <2 x float>* %dst.addr
 	ret void
 }
+
+; sign to float v3i8 to v3f32
+
+define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) nounwind {
+; X86-SSE2-LABEL: convert_v3i8_to_v3f32:
+; X86-SSE2:       # BB#0: # %entry
+; X86-SSE2-NEXT:    pushl %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X86-SSE2-NEXT:    psrad $24, %xmm0
+; X86-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE2-NEXT:    movss %xmm0, (%eax)
+; X86-SSE2-NEXT:    movaps %xmm0, %xmm1
+; X86-SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X86-SSE2-NEXT:    movss %xmm1, 8(%eax)
+; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86-SSE2-NEXT:    movss %xmm0, 4(%eax)
+; X86-SSE2-NEXT:    popl %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE42-LABEL: convert_v3i8_to_v3f32:
+; X86-SSE42:       # BB#0: # %entry
+; X86-SSE42-NEXT:    pushl %eax
+; X86-SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE42-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE42-NEXT:    movl (%ecx), %ecx
+; X86-SSE42-NEXT:    movl %ecx, (%esp)
+; X86-SSE42-NEXT:    pmovsxbd (%esp), %xmm0
+; X86-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE42-NEXT:    extractps $2, %xmm0, 8(%eax)
+; X86-SSE42-NEXT:    extractps $1, %xmm0, 4(%eax)
+; X86-SSE42-NEXT:    movss %xmm0, (%eax)
+; X86-SSE42-NEXT:    popl %eax
+; X86-SSE42-NEXT:    retl
+;
+; X64-SSE2-LABEL: convert_v3i8_to_v3f32:
+; X64-SSE2:       # BB#0: # %entry
+; X64-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X64-SSE2-NEXT:    psrad $24, %xmm0
+; X64-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-SSE2-NEXT:    movlps %xmm0, (%rdi)
+; X64-SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-SSE2-NEXT:    movss %xmm0, 8(%rdi)
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE42-LABEL: convert_v3i8_to_v3f32:
+; X64-SSE42:       # BB#0: # %entry
+; X64-SSE42-NEXT:    movl (%rsi), %eax
+; X64-SSE42-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; X64-SSE42-NEXT:    pmovsxbd -{{[0-9]+}}(%rsp), %xmm0
+; X64-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-SSE42-NEXT:    extractps $2, %xmm0, 8(%rdi)
+; X64-SSE42-NEXT:    movlps %xmm0, (%rdi)
+; X64-SSE42-NEXT:    retq
+entry:
+	%load = load <3 x i8>, <3 x i8>* %src.addr
+	%cvt = sitofp <3 x i8> %load to <3 x float>
+	store <3 x float> %cvt, <3 x float>* %dst.addr
+	ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-4.ll?rev=263640&r1=263639&r2=263640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll Wed Mar 16 10:33:43 2016
@@ -1,11 +1,132 @@
-; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
-; CHECK-NOT: cvtsi2ss
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE42
 
 ; unsigned to float v7i16 to v7f32
 
-define void @convert(<7 x float>* %dst.addr, <7 x i16> %src) nounwind {
+define void @convert_v7i16_v7f32(<7 x float>* %dst.addr, <7 x i16> %src) nounwind {
+; X86-SSE2-LABEL: convert_v7i16_v7f32:
+; X86-SSE2:       # BB#0: # %entry
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X86-SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
+; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE2-NEXT:    movaps %xmm0, (%eax)
+; X86-SSE2-NEXT:    movss %xmm2, 16(%eax)
+; X86-SSE2-NEXT:    movaps %xmm2, %xmm0
+; X86-SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X86-SSE2-NEXT:    movss %xmm0, 24(%eax)
+; X86-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X86-SSE2-NEXT:    movss %xmm2, 20(%eax)
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE42-LABEL: convert_v7i16_v7f32:
+; X86-SSE42:       # BB#0: # %entry
+; X86-SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE42-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE42-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; X86-SSE42-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE42-NEXT:    cvtdq2ps %xmm2, %xmm1
+; X86-SSE42-NEXT:    extractps $2, %xmm0, 24(%eax)
+; X86-SSE42-NEXT:    extractps $1, %xmm0, 20(%eax)
+; X86-SSE42-NEXT:    movaps %xmm1, (%eax)
+; X86-SSE42-NEXT:    movss %xmm0, 16(%eax)
+; X86-SSE42-NEXT:    retl
+;
+; X64-SSE2-LABEL: convert_v7i16_v7f32:
+; X64-SSE2:       # BB#0: # %entry
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
+; X64-SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-SSE2-NEXT:    movlps %xmm0, 16(%rdi)
+; X64-SSE2-NEXT:    movaps %xmm2, (%rdi)
+; X64-SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-SSE2-NEXT:    movss %xmm0, 24(%rdi)
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE42-LABEL: convert_v7i16_v7f32:
+; X64-SSE42:       # BB#0: # %entry
+; X64-SSE42-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE42-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; X64-SSE42-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-SSE42-NEXT:    cvtdq2ps %xmm2, %xmm1
+; X64-SSE42-NEXT:    extractps $2, %xmm0, 24(%rdi)
+; X64-SSE42-NEXT:    movlps %xmm0, 16(%rdi)
+; X64-SSE42-NEXT:    movaps %xmm1, (%rdi)
+; X64-SSE42-NEXT:    retq
 entry:
-	%val = sitofp <7 x i16> %src to <7 x float>
+	%val = uitofp <7 x i16> %src to <7 x float>
 	store <7 x float> %val, <7 x float>* %dst.addr
 	ret void
 }
+
+; unsigned to float v3i8 to v3f32
+
+define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) nounwind {
+; X86-SSE2-LABEL: convert_v3i8_to_v3f32:
+; X86-SSE2:       # BB#0: # %entry
+; X86-SSE2-NEXT:    pushl %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE2-NEXT:    movss %xmm0, (%eax)
+; X86-SSE2-NEXT:    movaps %xmm0, %xmm1
+; X86-SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X86-SSE2-NEXT:    movss %xmm1, 8(%eax)
+; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86-SSE2-NEXT:    movss %xmm0, 4(%eax)
+; X86-SSE2-NEXT:    popl %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE42-LABEL: convert_v3i8_to_v3f32:
+; X86-SSE42:       # BB#0: # %entry
+; X86-SSE42-NEXT:    pushl %eax
+; X86-SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE42-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE42-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X86-SSE42-NEXT:    extractps $2, %xmm0, 8(%eax)
+; X86-SSE42-NEXT:    extractps $1, %xmm0, 4(%eax)
+; X86-SSE42-NEXT:    movss %xmm0, (%eax)
+; X86-SSE42-NEXT:    popl %eax
+; X86-SSE42-NEXT:    retl
+;
+; X64-SSE2-LABEL: convert_v3i8_to_v3f32:
+; X64-SSE2:       # BB#0: # %entry
+; X64-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-SSE2-NEXT:    movlps %xmm0, (%rdi)
+; X64-SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-SSE2-NEXT:    movss %xmm0, 8(%rdi)
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE42-LABEL: convert_v3i8_to_v3f32:
+; X64-SSE42:       # BB#0: # %entry
+; X64-SSE42-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X64-SSE42-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-SSE42-NEXT:    extractps $2, %xmm0, 8(%rdi)
+; X64-SSE42-NEXT:    movlps %xmm0, (%rdi)
+; X64-SSE42-NEXT:    retq
+entry:
+	%load = load <3 x i8>, <3 x i8>* %src.addr
+	%cvt = uitofp <3 x i8> %load to <3 x float>
+	store <3 x float> %cvt, <3 x float>* %dst.addr
+	ret void
+}




More information about the llvm-commits mailing list