[llvm] r277416 - [AVX-512] Correct ExeDomain for many AVX-512 instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 1 22:11:15 PDT 2016
Author: ctopper
Date: Tue Aug 2 00:11:15 2016
New Revision: 277416
URL: http://llvm.org/viewvc/llvm-project?rev=277416&view=rev
Log:
[AVX-512] Correct ExeDomain for many AVX-512 instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFormats.td
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Aug 2 00:11:15 2016
@@ -4224,6 +4224,7 @@ defm VPTESTNM : avx512_vptest_all_forms
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -4235,10 +4236,12 @@ multiclass avx512_shift_rmi<bits<8> opc,
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rm>;
+ }
}
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
@@ -4249,6 +4252,7 @@ multiclass avx512_shift_rmbi<bits<8> opc
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
// src2 is always 128-bit
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -4260,6 +4264,7 @@ multiclass avx512_shift_rrm<bits<8> opc,
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
EVEX_4V;
+ }
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4353,6 +4358,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0x
//===-------------------------------------------------------------------===//
multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -4365,10 +4371,12 @@ multiclass avx512_var_shift<bits<8> opc,
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
+ }
}
multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
@@ -4635,8 +4643,10 @@ multiclass avx512_permil<string OpcodeSt
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
+let ExeDomain = SSEPackedSingle in
defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
avx512vl_i32_info>;
+let ExeDomain = SSEPackedDouble in
defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
@@ -7196,7 +7206,8 @@ defm VEXPANDPD : expand_by_elt_width <0x
// op(broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
+ X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
@@ -7216,11 +7227,13 @@ multiclass avx512_unary_fp_packed_imm<bi
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>, EVEX_B;
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
@@ -7250,7 +7263,8 @@ multiclass avx512_common_unary_fp_sae_pa
// op(reg_vec2,broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
+ X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -7273,13 +7287,14 @@ multiclass avx512_fp_packed_imm<bits<8>
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>, EVEX_B;
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
-
+ let ExeDomain = DestInfo.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -7293,6 +7308,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> op
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>;
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -7302,6 +7318,7 @@ multiclass avx512_3Op_imm8<bits<8> opc,
X86VectorVTInfo _>:
avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+ let ExeDomain = _.ExeDomain in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -7315,8 +7332,8 @@ multiclass avx512_3Op_imm8<bits<8> opc,
// op(reg_vec2,mem_scalar,imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
-
+ X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -7339,11 +7356,13 @@ multiclass avx512_fp_scalar_imm<bits<8>
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
[]>;
}
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
@@ -7907,8 +7926,8 @@ defm VPSADBW : avx512_psadbw_packed_all<
HasBWI>, EVEX_4V;
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- let Constraints = "$src1 = $dst" in {
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -7953,8 +7972,8 @@ defm VPTERNLOGQ : avx512_common_ternlog<
//===----------------------------------------------------------------------===//
multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- let Constraints = "$src1 = $dst" in {
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -7984,8 +8003,8 @@ multiclass avx512_fixupimm_packed<bits<8
}
multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86VectorVTInfo _>{
-let Constraints = "$src1 = $dst" in {
+ SDNode OpNode, X86VectorVTInfo _>{
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@@ -8000,7 +8019,8 @@ let Constraints = "$src1 = $dst" in {
multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
- let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in {
+ let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
+ ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFormats.td?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFormats.td Tue Aug 2 00:11:15 2016
@@ -785,7 +785,6 @@ class AVX512AIi8<bits<8> o, Format F, da
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
Requires<[HasAVX512]>;
class AVX512AIi8Base : TAPD {
- Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Aug 2 00:11:15 2016
@@ -4956,7 +4956,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
@@ -5995,7 +5995,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
@@ -6017,10 +6017,10 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vmovapd %zmm0, %zmm5
; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1
@@ -6041,10 +6041,10 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vmovaps %zmm0, %zmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
@@ -6065,11 +6065,11 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vmovaps %zmm0, %zmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
@@ -6088,10 +6088,10 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %zmm0, %zmm5
; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1
@@ -6111,9 +6111,9 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vmovaps %zmm0, %zmm4
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
@@ -6135,11 +6135,11 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vmovapd %zmm0, %zmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
@@ -6159,10 +6159,10 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vmovapd %zmm0, %zmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Tue Aug 2 00:11:15 2016
@@ -6889,7 +6889,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4]
; CHECK-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
@@ -6911,7 +6911,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
@@ -6931,7 +6931,7 @@ define <4 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
@@ -6953,10 +6953,10 @@ define <4 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe8]
+; CHECK-NEXT: vmovapd %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xe8]
; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd]
@@ -6976,9 +6976,9 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm4 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
@@ -6999,9 +6999,9 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm4 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
@@ -7022,9 +7022,9 @@ define <8 x float>@test_int_x86_avx512_m
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm4 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
@@ -7045,9 +7045,9 @@ define <8 x float>@test_int_x86_avx512_m
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm4 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Tue Aug 2 00:11:15 2016
@@ -2997,12 +2997,12 @@ define <4 x i16> @cvt_4f32_to_4i16(<4 x
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3109,12 +3109,12 @@ define <8 x i16> @cvt_4f32_to_8i16_undef
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3225,12 +3225,12 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3410,12 +3410,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
-; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
@@ -3433,12 +3433,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -3677,7 +3677,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
@@ -3691,12 +3691,12 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
@@ -3720,7 +3720,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r13w
@@ -3731,11 +3731,11 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r14w
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r15w
@@ -3888,11 +3888,11 @@ define void @store_cvt_4f32_to_4i16(<4 x
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r15d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %ebp
@@ -4009,12 +4009,12 @@ define void @store_cvt_4f32_to_8i16_unde
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: shll $16, %ebp
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -4133,12 +4133,12 @@ define void @store_cvt_4f32_to_8i16_zero
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: shll $16, %ebp
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
@@ -4286,11 +4286,11 @@ define void @store_cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r12d
@@ -4300,11 +4300,11 @@ define void @store_cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r13d
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %ebp
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
@@ -4554,11 +4554,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
@@ -4568,11 +4568,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
@@ -4582,11 +4582,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
@@ -4596,11 +4596,11 @@ define void @store_cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r15d
@@ -4671,77 +4671,23 @@ define i16 @cvt_f64_to_i16(double %a0) n
}
define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind {
-; AVX1-LABEL: cvt_2f64_to_2i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: subq $16, %rsp
-; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movw %ax, %bx
-; AVX1-NEXT: shll $16, %ebx
-; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movzwl %ax, %eax
-; AVX1-NEXT: orl %ebx, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: addq $16, %rsp
-; AVX1-NEXT: popq %rbx
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: cvt_2f64_to_2i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: pushq %rbx
-; AVX2-NEXT: subq $16, %rsp
-; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movw %ax, %bx
-; AVX2-NEXT: shll $16, %ebx
-; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movzwl %ax, %eax
-; AVX2-NEXT: orl %ebx, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: addq $16, %rsp
-; AVX2-NEXT: popq %rbx
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: cvt_2f64_to_2i16:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: pushq %rbx
-; AVX512F-NEXT: subq $16, %rsp
-; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movw %ax, %bx
-; AVX512F-NEXT: shll $16, %ebx
-; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movzwl %ax, %eax
-; AVX512F-NEXT: orl %ebx, %eax
-; AVX512F-NEXT: vmovd %eax, %xmm0
-; AVX512F-NEXT: addq $16, %rsp
-; AVX512F-NEXT: popq %rbx
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: cvt_2f64_to_2i16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: pushq %rbx
-; AVX512VL-NEXT: subq $16, %rsp
-; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movw %ax, %bx
-; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movzwl %ax, %eax
-; AVX512VL-NEXT: orl %ebx, %eax
-; AVX512VL-NEXT: vmovd %eax, %xmm0
-; AVX512VL-NEXT: addq $16, %rsp
-; AVX512VL-NEXT: popq %rbx
-; AVX512VL-NEXT: retq
+; ALL-LABEL: cvt_2f64_to_2i16:
+; ALL: # BB#0:
+; ALL-NEXT: pushq %rbx
+; ALL-NEXT: subq $16, %rsp
+; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
+; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movw %ax, %bx
+; ALL-NEXT: shll $16, %ebx
+; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movzwl %ax, %eax
+; ALL-NEXT: orl %ebx, %eax
+; ALL-NEXT: vmovd %eax, %xmm0
+; ALL-NEXT: addq $16, %rsp
+; ALL-NEXT: popq %rbx
+; ALL-NEXT: retq
%1 = fptrunc <2 x double> %a0 to <2 x half>
%2 = bitcast <2 x half> %1 to <2 x i16>
ret <2 x i16> %2
@@ -4861,7 +4807,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5011,7 +4957,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5165,7 +5111,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5410,7 +5356,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $96, %rsp
-; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill
+; AVX512VL-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
@@ -5491,81 +5437,24 @@ define void @store_cvt_f64_to_i16(double
}
define void @store_cvt_2f64_to_2i16(<2 x double> %a0, <2 x i16>* %a1) nounwind {
-; AVX1-LABEL: store_cvt_2f64_to_2i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: pushq %rbp
-; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: subq $24, %rsp
-; AVX1-NEXT: movq %rdi, %rbx
-; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movl %eax, %ebp
-; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movw %ax, (%rbx)
-; AVX1-NEXT: movw %bp, 2(%rbx)
-; AVX1-NEXT: addq $24, %rsp
-; AVX1-NEXT: popq %rbx
-; AVX1-NEXT: popq %rbp
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: store_cvt_2f64_to_2i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: pushq %rbp
-; AVX2-NEXT: pushq %rbx
-; AVX2-NEXT: subq $24, %rsp
-; AVX2-NEXT: movq %rdi, %rbx
-; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movl %eax, %ebp
-; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movw %ax, (%rbx)
-; AVX2-NEXT: movw %bp, 2(%rbx)
-; AVX2-NEXT: addq $24, %rsp
-; AVX2-NEXT: popq %rbx
-; AVX2-NEXT: popq %rbp
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: store_cvt_2f64_to_2i16:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: pushq %rbx
-; AVX512F-NEXT: subq $24, %rsp
-; AVX512F-NEXT: movq %rdi, %rbx
-; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movl %eax, %ebp
-; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movw %ax, (%rbx)
-; AVX512F-NEXT: movw %bp, 2(%rbx)
-; AVX512F-NEXT: addq $24, %rsp
-; AVX512F-NEXT: popq %rbx
-; AVX512F-NEXT: popq %rbp
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: store_cvt_2f64_to_2i16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: pushq %rbp
-; AVX512VL-NEXT: pushq %rbx
-; AVX512VL-NEXT: subq $24, %rsp
-; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movl %eax, %ebp
-; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movw %ax, (%rbx)
-; AVX512VL-NEXT: movw %bp, 2(%rbx)
-; AVX512VL-NEXT: addq $24, %rsp
-; AVX512VL-NEXT: popq %rbx
-; AVX512VL-NEXT: popq %rbp
-; AVX512VL-NEXT: retq
+; ALL-LABEL: store_cvt_2f64_to_2i16:
+; ALL: # BB#0:
+; ALL-NEXT: pushq %rbp
+; ALL-NEXT: pushq %rbx
+; ALL-NEXT: subq $24, %rsp
+; ALL-NEXT: movq %rdi, %rbx
+; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
+; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movl %eax, %ebp
+; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movw %ax, (%rbx)
+; ALL-NEXT: movw %bp, 2(%rbx)
+; ALL-NEXT: addq $24, %rsp
+; ALL-NEXT: popq %rbx
+; ALL-NEXT: popq %rbp
+; ALL-NEXT: retq
%1 = fptrunc <2 x double> %a0 to <2 x half>
%2 = bitcast <2 x half> %1 to <2 x i16>
store <2 x i16> %2, <2 x i16>* %a1
@@ -5692,7 +5581,7 @@ define void @store_cvt_4f64_to_4i16(<4 x
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $88, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqu64 %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r14d
@@ -5855,7 +5744,7 @@ define void @store_cvt_4f64_to_8i16_unde
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bp
@@ -6026,7 +5915,7 @@ define void @store_cvt_4f64_to_8i16_zero
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bp
@@ -6281,7 +6170,7 @@ define void @store_cvt_8f64_to_8i16(<8 x
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $200, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
+; AVX512VL-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Tue Aug 2 00:11:15 2016
@@ -389,45 +389,21 @@ define <4 x double> @shuffle_v4f64_1054(
}
define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3254:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_3254:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3254:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_3254:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3276:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_3276:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3276:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_3276:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x double> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Tue Aug 2 00:11:15 2016
@@ -190,7 +190,7 @@ define <16 x i32> @shuffle_v16i32_01_02_
define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) {
; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
; ALL: # BB#0:
-; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
+; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Tue Aug 2 00:11:15 2016
@@ -51,13 +51,13 @@ define <8 x double> @shuffle_v8f64_44444
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00000010:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00000010:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
@@ -67,13 +67,13 @@ define <8 x double> @shuffle_v8f64_00000
define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00000200:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00000200:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
@@ -83,13 +83,13 @@ define <8 x double> @shuffle_v8f64_00000
define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00003000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00003000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
@@ -99,13 +99,13 @@ define <8 x double> @shuffle_v8f64_00003
define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00040000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00040000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
@@ -115,13 +115,13 @@ define <8 x double> @shuffle_v8f64_00040
define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00500000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00500000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -131,13 +131,13 @@ define <8 x double> @shuffle_v8f64_00500
define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_06000000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_06000000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -184,13 +184,13 @@ define <8 x double> @shuffle_v8f64_01014
define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00112233:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00112233:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
@@ -200,13 +200,13 @@ define <8 x double> @shuffle_v8f64_00112
define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00001111:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00001111:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
@@ -624,13 +624,13 @@ define <8 x double> @shuffle_v8f64_00015
;
; AVX512F-LABEL: shuffle_v8f64_00015444:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00015444:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
@@ -641,13 +641,13 @@ define <8 x double> @shuffle_v8f64_00204
;
; AVX512F-LABEL: shuffle_v8f64_00204644:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00204644:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
@@ -658,13 +658,13 @@ define <8 x double> @shuffle_v8f64_03004
;
; AVX512F-LABEL: shuffle_v8f64_03004474:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_03004474:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
@@ -675,13 +675,13 @@ define <8 x double> @shuffle_v8f64_10004
;
; AVX512F-LABEL: shuffle_v8f64_10004444:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_10004444:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
@@ -692,13 +692,13 @@ define <8 x double> @shuffle_v8f64_22006
;
; AVX512F-LABEL: shuffle_v8f64_22006446:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_22006446:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
@@ -709,13 +709,13 @@ define <8 x double> @shuffle_v8f64_33307
;
; AVX512F-LABEL: shuffle_v8f64_33307474:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_33307474:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
@@ -726,13 +726,13 @@ define <8 x double> @shuffle_v8f64_32104
;
; AVX512F-LABEL: shuffle_v8f64_32104567:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_32104567:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -743,13 +743,13 @@ define <8 x double> @shuffle_v8f64_00236
;
; AVX512F-LABEL: shuffle_v8f64_00236744:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00236744:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
@@ -760,13 +760,13 @@ define <8 x double> @shuffle_v8f64_00226
;
; AVX512F-LABEL: shuffle_v8f64_00226644:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00226644:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
@@ -837,13 +837,13 @@ define <8 x double> @shuffle_v8f64_002u6
;
; AVX512F-LABEL: shuffle_v8f64_002u6u44:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
@@ -854,13 +854,13 @@ define <8 x double> @shuffle_v8f64_00uu6
;
; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=277416&r1=277415&r2=277416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Tue Aug 2 00:11:15 2016
@@ -29,11 +29,11 @@ define <8 x double> @combine_permvar_8f6
; CHECK-LABEL: combine_permvar_8f64_identity_mask:
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
+; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
+; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 %m)
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 %m)
@@ -433,7 +433,7 @@ define <8 x double> @combine_permvar_8f6
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m)
ret <8 x double> %1
More information about the llvm-commits
mailing list