[llvm] r348085 - [X86] Custom type legalize v2i32/v4i16/v8i8->i64 bitcasts in 64-bit mode similar to what's done when the destination is f64.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 1 21:46:48 PST 2018
Author: ctopper
Date: Sat Dec 1 21:46:48 2018
New Revision: 348085
URL: http://llvm.org/viewvc/llvm-project?rev=348085&view=rev
Log:
[X86] Custom type legalize v2i32/v4i16/v8i8->i64 bitcasts in 64-bit mode similar to what's done when the destination is f64.
The generic legalizer will fall back to a stack spill that uses a truncating store. That store will get expanded into a shuffle and non-truncating store on pre-avx512 targets. Once that happens the stack store/load pair will be combined away leaving behind the shuffle and bitcasts. On avx512 targets the truncating store is legal so doesn't get folded away.
By custom legalizing it we can avoid this churn and maybe produce better code.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_insert-7.ll
llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
llvm/trunk/test/CodeGen/X86/vector-trunc.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=348085&r1=348084&r2=348085&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 1 21:46:48 2018
@@ -25221,7 +25221,7 @@ static SDValue LowerBITCAST(SDValue Op,
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- if (DstVT != MVT::f64)
+ if (DstVT != MVT::f64 && DstVT != MVT::i64)
// This conversion needs to be expanded.
return SDValue();
@@ -25253,8 +25253,9 @@ static SDValue LowerBITCAST(SDValue Op,
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
- SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
+ MVT V2X64VT = MVT::getVectorVT(DstVT, 2);
+ SDValue ToV2X64 = DAG.getBitcast(V2X64VT, BV);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, ToV2X64,
DAG.getIntPtrConstant(0, dl));
}
Modified: llvm/trunk/test/CodeGen/X86/vec_insert-7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-7.ll?rev=348085&r1=348084&r2=348085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-7.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-7.ll Sat Dec 1 21:46:48 2018
@@ -19,7 +19,7 @@ define x86_mmx @mmx_movzl(x86_mmx %x) no
; X64-LABEL: mmx_movzl:
; X64: ## %bb.0:
; X64-NEXT: movl $32, %eax
-; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: retq
%tmp = bitcast x86_mmx %x to <2 x i32>
%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0
Modified: llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll?rev=348085&r1=348084&r2=348085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll Sat Dec 1 21:46:48 2018
@@ -13,10 +13,8 @@ define x86_mmx @t0(i32 %A) nounwind {
;
; X64-LABEL: t0:
; X64: ## %bb.0:
-; X64-NEXT: ## kill: def $edi killed $edi def $rdi
-; X64-NEXT: movq %rdi, %xmm0
-; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: movd %edi, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; X64-NEXT: retq
%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1
%tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=348085&r1=348084&r2=348085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Sat Dec 1 21:46:48 2018
@@ -22,224 +22,63 @@ define float @cvt_i16_to_f32(i16 %a0) no
}
define <4 x float> @cvt_4i16_to_4f32(<4 x i16> %a0) nounwind {
-; AVX1-LABEL: cvt_4i16_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movq %rax, %rdx
-; AVX1-NEXT: movswl %ax, %esi
-; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX1-NEXT: shrl $16, %eax
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: shrq $48, %rdx
-; AVX1-NEXT: movswl %dx, %edx
-; AVX1-NEXT: vmovd %edx, %xmm0
-; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm1
-; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX1-NEXT: vmovd %esi, %xmm3
-; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: cvt_4i16_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movq %rax, %rdx
-; AVX2-NEXT: movswl %ax, %esi
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: shrq $48, %rdx
-; AVX2-NEXT: movswl %dx, %edx
-; AVX2-NEXT: vmovd %edx, %xmm0
-; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: movswl %cx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm1
-; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm2
-; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX2-NEXT: vmovd %esi, %xmm3
-; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: cvt_4i16_to_4f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movq %rax, %rdx
-; AVX512F-NEXT: movswl %ax, %esi
-; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX512F-NEXT: shrl $16, %eax
-; AVX512F-NEXT: shrq $32, %rcx
-; AVX512F-NEXT: shrq $48, %rdx
-; AVX512F-NEXT: movswl %dx, %edx
-; AVX512F-NEXT: vmovd %edx, %xmm0
-; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512F-NEXT: movswl %cx, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512F-NEXT: cwtl
-; AVX512F-NEXT: vmovd %eax, %xmm2
-; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512F-NEXT: vmovd %esi, %xmm3
-; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: cvt_4i16_to_4f32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: movq %rax, %rcx
-; AVX512VL-NEXT: movq %rax, %rdx
-; AVX512VL-NEXT: movswl %ax, %esi
-; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX512VL-NEXT: shrl $16, %eax
-; AVX512VL-NEXT: shrq $32, %rcx
-; AVX512VL-NEXT: shrq $48, %rdx
-; AVX512VL-NEXT: movswl %dx, %edx
-; AVX512VL-NEXT: vmovd %edx, %xmm0
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512VL-NEXT: movswl %cx, %ecx
-; AVX512VL-NEXT: vmovd %ecx, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512VL-NEXT: cwtl
-; AVX512VL-NEXT: vmovd %eax, %xmm2
-; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512VL-NEXT: vmovd %esi, %xmm3
-; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: cvt_4i16_to_4f32:
+; ALL: # %bb.0:
+; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; ALL-NEXT: vmovq %xmm0, %rax
+; ALL-NEXT: movq %rax, %rcx
+; ALL-NEXT: movq %rax, %rdx
+; ALL-NEXT: movswl %ax, %esi
+; ALL-NEXT: # kill: def $eax killed $eax killed $rax
+; ALL-NEXT: shrl $16, %eax
+; ALL-NEXT: shrq $32, %rcx
+; ALL-NEXT: shrq $48, %rdx
+; ALL-NEXT: movswl %dx, %edx
+; ALL-NEXT: vmovd %edx, %xmm0
+; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
+; ALL-NEXT: movswl %cx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm1
+; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
+; ALL-NEXT: cwtl
+; ALL-NEXT: vmovd %eax, %xmm2
+; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
+; ALL-NEXT: vmovd %esi, %xmm3
+; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
+; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; ALL-NEXT: retq
%1 = bitcast <4 x i16> %a0 to <4 x half>
%2 = fpext <4 x half> %1 to <4 x float>
ret <4 x float> %2
}
define <4 x float> @cvt_8i16_to_4f32(<8 x i16> %a0) nounwind {
-; AVX1-LABEL: cvt_8i16_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movq %rax, %rdx
-; AVX1-NEXT: movswl %ax, %esi
-; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX1-NEXT: shrl $16, %eax
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: shrq $48, %rdx
-; AVX1-NEXT: movswl %dx, %edx
-; AVX1-NEXT: vmovd %edx, %xmm0
-; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm1
-; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX1-NEXT: vmovd %esi, %xmm3
-; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: cvt_8i16_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movq %rax, %rdx
-; AVX2-NEXT: movswl %ax, %esi
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: shrq $48, %rdx
-; AVX2-NEXT: movswl %dx, %edx
-; AVX2-NEXT: vmovd %edx, %xmm0
-; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: movswl %cx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm1
-; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm2
-; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX2-NEXT: vmovd %esi, %xmm3
-; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: cvt_8i16_to_4f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movq %rax, %rdx
-; AVX512F-NEXT: movswl %ax, %esi
-; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX512F-NEXT: shrl $16, %eax
-; AVX512F-NEXT: shrq $32, %rcx
-; AVX512F-NEXT: shrq $48, %rdx
-; AVX512F-NEXT: movswl %dx, %edx
-; AVX512F-NEXT: vmovd %edx, %xmm0
-; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512F-NEXT: movswl %cx, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512F-NEXT: cwtl
-; AVX512F-NEXT: vmovd %eax, %xmm2
-; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512F-NEXT: vmovd %esi, %xmm3
-; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: cvt_8i16_to_4f32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: movq %rax, %rcx
-; AVX512VL-NEXT: movq %rax, %rdx
-; AVX512VL-NEXT: movswl %ax, %esi
-; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX512VL-NEXT: shrl $16, %eax
-; AVX512VL-NEXT: shrq $32, %rcx
-; AVX512VL-NEXT: shrq $48, %rdx
-; AVX512VL-NEXT: movswl %dx, %edx
-; AVX512VL-NEXT: vmovd %edx, %xmm0
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512VL-NEXT: movswl %cx, %ecx
-; AVX512VL-NEXT: vmovd %ecx, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512VL-NEXT: cwtl
-; AVX512VL-NEXT: vmovd %eax, %xmm2
-; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512VL-NEXT: vmovd %esi, %xmm3
-; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: cvt_8i16_to_4f32:
+; ALL: # %bb.0:
+; ALL-NEXT: vmovq %xmm0, %rax
+; ALL-NEXT: movq %rax, %rcx
+; ALL-NEXT: movq %rax, %rdx
+; ALL-NEXT: movswl %ax, %esi
+; ALL-NEXT: # kill: def $eax killed $eax killed $rax
+; ALL-NEXT: shrl $16, %eax
+; ALL-NEXT: shrq $32, %rcx
+; ALL-NEXT: shrq $48, %rdx
+; ALL-NEXT: movswl %dx, %edx
+; ALL-NEXT: vmovd %edx, %xmm0
+; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
+; ALL-NEXT: movswl %cx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm1
+; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
+; ALL-NEXT: cwtl
+; ALL-NEXT: vmovd %eax, %xmm2
+; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
+; ALL-NEXT: vmovd %esi, %xmm3
+; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
+; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; ALL-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = bitcast <4 x i16> %1 to <4 x half>
%3 = fpext <4 x half> %2 to <4 x float>
@@ -730,111 +569,31 @@ define <4 x float> @load_cvt_4i16_to_4f3
}
define <4 x float> @load_cvt_8i16_to_4f32(<8 x i16>* %a0) nounwind {
-; AVX1-LABEL: load_cvt_8i16_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: movq (%rdi), %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movq %rax, %rdx
-; AVX1-NEXT: movswl %ax, %esi
-; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX1-NEXT: shrl $16, %eax
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: shrq $48, %rdx
-; AVX1-NEXT: movswl %dx, %edx
-; AVX1-NEXT: vmovd %edx, %xmm0
-; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm1
-; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX1-NEXT: vmovd %esi, %xmm3
-; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_cvt_8i16_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movq (%rdi), %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movq %rax, %rdx
-; AVX2-NEXT: movswl %ax, %esi
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: shrq $48, %rdx
-; AVX2-NEXT: movswl %dx, %edx
-; AVX2-NEXT: vmovd %edx, %xmm0
-; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: movswl %cx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm1
-; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm2
-; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX2-NEXT: vmovd %esi, %xmm3
-; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: load_cvt_8i16_to_4f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movq (%rdi), %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movq %rax, %rdx
-; AVX512F-NEXT: movswl %ax, %esi
-; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX512F-NEXT: shrl $16, %eax
-; AVX512F-NEXT: shrq $32, %rcx
-; AVX512F-NEXT: shrq $48, %rdx
-; AVX512F-NEXT: movswl %dx, %edx
-; AVX512F-NEXT: vmovd %edx, %xmm0
-; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512F-NEXT: movswl %cx, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512F-NEXT: cwtl
-; AVX512F-NEXT: vmovd %eax, %xmm2
-; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512F-NEXT: vmovd %esi, %xmm3
-; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: load_cvt_8i16_to_4f32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: movq %rax, %rcx
-; AVX512VL-NEXT: movq %rax, %rdx
-; AVX512VL-NEXT: movswl %ax, %esi
-; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX512VL-NEXT: shrl $16, %eax
-; AVX512VL-NEXT: shrq $32, %rcx
-; AVX512VL-NEXT: shrq $48, %rdx
-; AVX512VL-NEXT: movswl %dx, %edx
-; AVX512VL-NEXT: vmovd %edx, %xmm0
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512VL-NEXT: movswl %cx, %ecx
-; AVX512VL-NEXT: vmovd %ecx, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512VL-NEXT: cwtl
-; AVX512VL-NEXT: vmovd %eax, %xmm2
-; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512VL-NEXT: vmovd %esi, %xmm3
-; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: load_cvt_8i16_to_4f32:
+; ALL: # %bb.0:
+; ALL-NEXT: movq (%rdi), %rax
+; ALL-NEXT: movq %rax, %rcx
+; ALL-NEXT: movq %rax, %rdx
+; ALL-NEXT: movswl %ax, %esi
+; ALL-NEXT: # kill: def $eax killed $eax killed $rax
+; ALL-NEXT: shrl $16, %eax
+; ALL-NEXT: shrq $32, %rcx
+; ALL-NEXT: shrq $48, %rdx
+; ALL-NEXT: movswl %dx, %edx
+; ALL-NEXT: vmovd %edx, %xmm0
+; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
+; ALL-NEXT: movswl %cx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm1
+; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
+; ALL-NEXT: cwtl
+; ALL-NEXT: vmovd %eax, %xmm2
+; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
+; ALL-NEXT: vmovd %esi, %xmm3
+; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
+; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; ALL-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <4 x i16> %2 to <4 x half>
@@ -1261,125 +1020,35 @@ define <2 x double> @cvt_2i16_to_2f64(<2
}
define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind {
-; AVX1-LABEL: cvt_4i16_to_4f64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: movswl %ax, %esi
-; AVX1-NEXT: shrq $48, %rax
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movswl %dx, %edx
-; AVX1-NEXT: vmovd %edx, %xmm0
-; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: vmovd %esi, %xmm1
-; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm3
-; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: cvt_4i16_to_4f64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movl %eax, %edx
-; AVX2-NEXT: movswl %ax, %esi
-; AVX2-NEXT: shrq $48, %rax
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: shrl $16, %edx
-; AVX2-NEXT: movswl %dx, %edx
-; AVX2-NEXT: vmovd %edx, %xmm0
-; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: vmovd %esi, %xmm1
-; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX2-NEXT: movswl %cx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: cvt_4i16_to_4f64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movl %eax, %edx
-; AVX512F-NEXT: movswl %ax, %esi
-; AVX512F-NEXT: shrq $48, %rax
-; AVX512F-NEXT: shrq $32, %rcx
-; AVX512F-NEXT: shrl $16, %edx
-; AVX512F-NEXT: movswl %dx, %edx
-; AVX512F-NEXT: vmovd %edx, %xmm0
-; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %esi, %xmm1
-; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512F-NEXT: movswl %cx, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512F-NEXT: cwtl
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: cvt_4i16_to_4f64:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: movq %rax, %rcx
-; AVX512VL-NEXT: movl %eax, %edx
-; AVX512VL-NEXT: movswl %ax, %esi
-; AVX512VL-NEXT: shrq $48, %rax
-; AVX512VL-NEXT: shrq $32, %rcx
-; AVX512VL-NEXT: shrl $16, %edx
-; AVX512VL-NEXT: movswl %dx, %edx
-; AVX512VL-NEXT: vmovd %edx, %xmm0
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512VL-NEXT: vmovd %esi, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512VL-NEXT: movswl %cx, %ecx
-; AVX512VL-NEXT: vmovd %ecx, %xmm2
-; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512VL-NEXT: cwtl
-; AVX512VL-NEXT: vmovd %eax, %xmm3
-; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: cvt_4i16_to_4f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; ALL-NEXT: vmovq %xmm0, %rax
+; ALL-NEXT: movq %rax, %rcx
+; ALL-NEXT: movl %eax, %edx
+; ALL-NEXT: movswl %ax, %esi
+; ALL-NEXT: shrq $48, %rax
+; ALL-NEXT: shrq $32, %rcx
+; ALL-NEXT: shrl $16, %edx
+; ALL-NEXT: movswl %dx, %edx
+; ALL-NEXT: vmovd %edx, %xmm0
+; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
+; ALL-NEXT: vmovd %esi, %xmm1
+; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
+; ALL-NEXT: movswl %cx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm2
+; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
+; ALL-NEXT: cwtl
+; ALL-NEXT: vmovd %eax, %xmm3
+; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
+; ALL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
+; ALL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: retq
%1 = bitcast <4 x i16> %a0 to <4 x half>
%2 = fpext <4 x half> %1 to <4 x double>
ret <4 x double> %2
@@ -1454,123 +1123,34 @@ define <2 x double> @cvt_8i16_to_2f64(<8
}
define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind {
-; AVX1-LABEL: cvt_8i16_to_4f64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: movswl %ax, %esi
-; AVX1-NEXT: shrq $48, %rax
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movswl %dx, %edx
-; AVX1-NEXT: vmovd %edx, %xmm0
-; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: vmovd %esi, %xmm1
-; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm3
-; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: cvt_8i16_to_4f64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movl %eax, %edx
-; AVX2-NEXT: movswl %ax, %esi
-; AVX2-NEXT: shrq $48, %rax
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: shrl $16, %edx
-; AVX2-NEXT: movswl %dx, %edx
-; AVX2-NEXT: vmovd %edx, %xmm0
-; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: vmovd %esi, %xmm1
-; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX2-NEXT: movswl %cx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: cvt_8i16_to_4f64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movl %eax, %edx
-; AVX512F-NEXT: movswl %ax, %esi
-; AVX512F-NEXT: shrq $48, %rax
-; AVX512F-NEXT: shrq $32, %rcx
-; AVX512F-NEXT: shrl $16, %edx
-; AVX512F-NEXT: movswl %dx, %edx
-; AVX512F-NEXT: vmovd %edx, %xmm0
-; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %esi, %xmm1
-; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512F-NEXT: movswl %cx, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512F-NEXT: cwtl
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: cvt_8i16_to_4f64:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: movq %rax, %rcx
-; AVX512VL-NEXT: movl %eax, %edx
-; AVX512VL-NEXT: movswl %ax, %esi
-; AVX512VL-NEXT: shrq $48, %rax
-; AVX512VL-NEXT: shrq $32, %rcx
-; AVX512VL-NEXT: shrl $16, %edx
-; AVX512VL-NEXT: movswl %dx, %edx
-; AVX512VL-NEXT: vmovd %edx, %xmm0
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512VL-NEXT: vmovd %esi, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512VL-NEXT: movswl %cx, %ecx
-; AVX512VL-NEXT: vmovd %ecx, %xmm2
-; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512VL-NEXT: cwtl
-; AVX512VL-NEXT: vmovd %eax, %xmm3
-; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: cvt_8i16_to_4f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vmovq %xmm0, %rax
+; ALL-NEXT: movq %rax, %rcx
+; ALL-NEXT: movl %eax, %edx
+; ALL-NEXT: movswl %ax, %esi
+; ALL-NEXT: shrq $48, %rax
+; ALL-NEXT: shrq $32, %rcx
+; ALL-NEXT: shrl $16, %edx
+; ALL-NEXT: movswl %dx, %edx
+; ALL-NEXT: vmovd %edx, %xmm0
+; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
+; ALL-NEXT: vmovd %esi, %xmm1
+; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
+; ALL-NEXT: movswl %cx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm2
+; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
+; ALL-NEXT: cwtl
+; ALL-NEXT: vmovd %eax, %xmm3
+; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
+; ALL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
+; ALL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = bitcast <4 x i16> %1 to <4 x half>
%3 = fpext <4 x half> %2 to <4 x double>
@@ -1812,123 +1392,34 @@ define <4 x double> @load_cvt_4i16_to_4f
}
define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind {
-; AVX1-LABEL: load_cvt_8i16_to_4f64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: movq (%rdi), %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: movswl %ax, %esi
-; AVX1-NEXT: shrq $48, %rax
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movswl %dx, %edx
-; AVX1-NEXT: vmovd %edx, %xmm0
-; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: vmovd %esi, %xmm1
-; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm3
-; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_cvt_8i16_to_4f64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movq (%rdi), %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movl %eax, %edx
-; AVX2-NEXT: movswl %ax, %esi
-; AVX2-NEXT: shrq $48, %rax
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: shrl $16, %edx
-; AVX2-NEXT: movswl %dx, %edx
-; AVX2-NEXT: vmovd %edx, %xmm0
-; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: vmovd %esi, %xmm1
-; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX2-NEXT: movswl %cx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: load_cvt_8i16_to_4f64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movq (%rdi), %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movl %eax, %edx
-; AVX512F-NEXT: movswl %ax, %esi
-; AVX512F-NEXT: shrq $48, %rax
-; AVX512F-NEXT: shrq $32, %rcx
-; AVX512F-NEXT: shrl $16, %edx
-; AVX512F-NEXT: movswl %dx, %edx
-; AVX512F-NEXT: vmovd %edx, %xmm0
-; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %esi, %xmm1
-; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512F-NEXT: movswl %cx, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512F-NEXT: cwtl
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: load_cvt_8i16_to_4f64:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: movq %rax, %rcx
-; AVX512VL-NEXT: movl %eax, %edx
-; AVX512VL-NEXT: movswl %ax, %esi
-; AVX512VL-NEXT: shrq $48, %rax
-; AVX512VL-NEXT: shrq $32, %rcx
-; AVX512VL-NEXT: shrl $16, %edx
-; AVX512VL-NEXT: movswl %dx, %edx
-; AVX512VL-NEXT: vmovd %edx, %xmm0
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX512VL-NEXT: vmovd %esi, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
-; AVX512VL-NEXT: movswl %cx, %ecx
-; AVX512VL-NEXT: vmovd %ecx, %xmm2
-; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
-; AVX512VL-NEXT: cwtl
-; AVX512VL-NEXT: vmovd %eax, %xmm3
-; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
-; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: load_cvt_8i16_to_4f64:
+; ALL: # %bb.0:
+; ALL-NEXT: movq (%rdi), %rax
+; ALL-NEXT: movq %rax, %rcx
+; ALL-NEXT: movl %eax, %edx
+; ALL-NEXT: movswl %ax, %esi
+; ALL-NEXT: shrq $48, %rax
+; ALL-NEXT: shrq $32, %rcx
+; ALL-NEXT: shrl $16, %edx
+; ALL-NEXT: movswl %dx, %edx
+; ALL-NEXT: vmovd %edx, %xmm0
+; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
+; ALL-NEXT: vmovd %esi, %xmm1
+; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
+; ALL-NEXT: movswl %cx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm2
+; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
+; ALL-NEXT: cwtl
+; ALL-NEXT: vmovd %eax, %xmm3
+; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
+; ALL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
+; ALL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <4 x i16> %2 to <4 x half>
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc.ll?rev=348085&r1=348084&r2=348085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc.ll Sat Dec 1 21:46:48 2018
@@ -1639,29 +1639,11 @@ define i64 @trunc2i64_i64(<2 x i64> %inv
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
;
-; AVX512F-LABEL: trunc2i64_i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc2i64_i64:
-; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: trunc2i64_i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512BW-NEXT: vmovq %xmm0, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512BWVL-LABEL: trunc2i64_i64:
-; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc2i64_i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: retq
entry:
%0 = trunc <2 x i64> %inval to <2 x i32>
%1 = bitcast <2 x i32> %0 to i64
@@ -1746,29 +1728,11 @@ define i64 @trunc4i32_i64(<4 x i32> %inv
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
;
-; AVX512F-LABEL: trunc4i32_i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc4i32_i64:
-; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: trunc4i32_i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX512BW-NEXT: vmovq %xmm0, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512BWVL-LABEL: trunc4i32_i64:
-; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc4i32_i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: retq
entry:
%0 = trunc <4 x i32> %inval to <4 x i16>
%1 = bitcast <4 x i16> %0 to i64
@@ -1849,29 +1813,11 @@ define i64 @trunc8i16_i64(<8 x i16> %inv
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
;
-; AVX512F-LABEL: trunc8i16_i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc8i16_i64:
-; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX512VL-NEXT: vmovq %xmm0, %rax
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: trunc8i16_i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX512BW-NEXT: vmovq %xmm0, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512BWVL-LABEL: trunc8i16_i64:
-; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovwb %xmm0, -{{[0-9]+}}(%rsp)
-; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc8i16_i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: retq
entry:
%0 = trunc <8 x i16> %inval to <8 x i8>
%1 = bitcast <8 x i8> %0 to i64
More information about the llvm-commits
mailing list