[llvm] r276393 - [AVX512] Add initial support for the Execution Domain fixing pass to change some EVEX instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 21 22:00:53 PDT 2016
Author: ctopper
Date: Fri Jul 22 00:00:52 2016
New Revision: 276393
URL: http://llvm.org/viewvc/llvm-project?rev=276393&view=rev
Log:
[AVX512] Add initial support for the Execution Domain fixing pass to change some EVEX instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
llvm/trunk/test/CodeGen/X86/avx512-arith.ll
llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll
llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-logic.ll
llvm/trunk/test/CodeGen/X86/avx512-mov.ll
llvm/trunk/test/CodeGen/X86/avx512-select.ll
llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/masked_memop.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
llvm/trunk/test/CodeGen/X86/pmul.ll
llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Jul 22 00:00:52 2016
@@ -7219,7 +7219,12 @@ static const uint16_t ReplaceableInstrs[
{ X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
{ X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
{ X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+ // AVX512 support
+ { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
+ { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+ { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+ { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {
@@ -7246,6 +7251,40 @@ static const uint16_t ReplaceableInstrsA
{ X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}
};
+static const uint16_t ReplaceableInstrsAVX512[][4] = {
+ // Two integer columns for 64-bit and 32-bit elements.
+ //PackedSingle PackedDouble PackedInt PackedInt
+ { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
+ { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
+ { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
+ { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
+ { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm },
+ { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr },
+ { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
+ { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
+ { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
+ { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
+ { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
+ { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
+ { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm },
+ { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr },
+ { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
+ { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
+ { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm },
+ { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr },
+ { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm },
+ { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr },
+ { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm },
+ { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
+ { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
+ { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
+ { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA64Zmr },
+ { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA64Zrm },
+ { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrr },
+ { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU64Zmr },
+ { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU64Zrm },
+};
+
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
@@ -7263,6 +7302,14 @@ static const uint16_t *lookupAVX2(unsign
return nullptr;
}
+static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain) {
+ // If this is the integer domain make sure to check both integer columns.
+ for (const uint16_t (&Row)[4] : ReplaceableInstrsAVX512)
+ if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
+ return Row;
+ return nullptr;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
@@ -7272,6 +7319,8 @@ X86InstrInfo::getExecutionDomain(const M
validDomains = 0xe;
else if (domain && lookupAVX2(MI.getOpcode(), domain))
validDomains = hasAVX2 ? 0xe : 0x6;
+ else if (domain && lookupAVX512(MI.getOpcode(), domain))
+ validDomains = 0xe;
return std::make_pair(domain, validDomains);
}
@@ -7285,6 +7334,12 @@ void X86InstrInfo::setExecutionDomain(Ma
"256-bit vector operations only available in AVX2");
table = lookupAVX2(MI.getOpcode(), dom);
}
+ if (!table) { // try the AVX512 table
+ table = lookupAVX512(MI.getOpcode(), dom);
+ // Don't change integer Q instructions to D instructions.
+ if (dom == 3 && table[3] == MI.getOpcode())
+ Domain = 4;
+ }
assert(table && "Cannot change domain");
MI.setDesc(get(table[Domain - 1]));
}
Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Fri Jul 22 00:00:52 2016
@@ -321,7 +321,7 @@ void X86PassConfig::addPreSched2() { add
void X86PassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
- addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ addPass(createExecutionDependencyFixPass(&X86::VR128XRegClass));
if (UseVZeroUpper)
addPass(createX86IssueVZeroUpperPass());
Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Fri Jul 22 00:00:52 2016
@@ -891,7 +891,7 @@ define <8 x double> @test_mask_broadcast
; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
double* %j, <8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
@@ -962,30 +962,10 @@ define <8 x float> @test_fxor_8f32(<8 x
}
define <8 x double> @fabs_v8f64(<8 x double> %p)
-; AVX512F-LABEL: fabs_v8f64:
-; AVX512F: ## BB#0:
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fabs_v8f64:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: fabs_v8f64:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: fabs_v8f64:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
-; AVX512DQ-NEXT: retq
-;
-; SKX-LABEL: fabs_v8f64:
-; SKX: ## BB#0:
-; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
-; SKX-NEXT: retq
+; CHECK-LABEL: fabs_v8f64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
{
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
ret <8 x double> %t
Modified: llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll Fri Jul 22 00:00:52 2016
@@ -10,7 +10,7 @@ define void @bar__512(<16 x i32>* %var)
; CHECK-NEXT: subq $112, %rsp
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0
-; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
+; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) ## 64-byte Spill
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
; CHECK-NEXT: callq _Print__512
Modified: llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll Fri Jul 22 00:00:52 2016
@@ -13,10 +13,10 @@ define <32 x double> @test_load_32f64(<3
; AVX512BW-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
; AVX512BW-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm1
-; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
-; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
+; AVX512BW-NEXT: vmovapd %zmm1, %zmm0
+; AVX512BW-NEXT: vmovapd %zmm2, %zmm1
+; AVX512BW-NEXT: vmovapd %zmm3, %zmm2
+; AVX512BW-NEXT: vmovapd %zmm4, %zmm3
; AVX512BW-NEXT: retq
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
ret <32 x double> %res
@@ -34,10 +34,10 @@ define <32 x i64> @test_load_32i64(<32 x
; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
; AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm4 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm1
-; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
-; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm1
+; AVX512BW-NEXT: vmovdqa64 %zmm3, %zmm2
+; AVX512BW-NEXT: vmovdqa64 %zmm4, %zmm3
; AVX512BW-NEXT: retq
%res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
ret <32 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Fri Jul 22 00:00:52 2016
@@ -163,7 +163,7 @@ define <32 x i16> @zext_32x8mem_to_32x16
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32x8mem_to_32x16:
@@ -192,7 +192,7 @@ define <32 x i16> @sext_32x8mem_to_32x16
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: sext_32x8mem_to_32x16:
@@ -213,7 +213,7 @@ define <32 x i16> @zext_32x8_to_32x16(<3
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32x8_to_32x16:
@@ -258,7 +258,7 @@ define <32 x i16> @sext_32x8_to_32x16(<3
; KNL-NEXT: vpmovsxbw %xmm0, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
-; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: sext_32x8_to_32x16:
Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -129,7 +129,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -146,7 +146,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -163,7 +163,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -229,7 +229,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -358,7 +358,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -485,7 +485,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -502,7 +502,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -519,7 +519,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -679,7 +679,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -696,7 +696,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
@@ -743,7 +743,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Fri Jul 22 00:00:52 2016
@@ -204,7 +204,7 @@ define <16 x i32> @test11(<16 x i32>%a,
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB10_2
; KNL-NEXT: ## BB#1: ## %A
-; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL-NEXT: retq
; KNL-NEXT: LBB10_2: ## %B
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
@@ -219,7 +219,7 @@ define <16 x i32> @test11(<16 x i32>%a,
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je LBB10_2
; SKX-NEXT: ## BB#1: ## %A
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
; SKX-NEXT: LBB10_2: ## %B
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Fri Jul 22 00:00:52 2016
@@ -745,7 +745,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
@@ -777,7 +777,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
@@ -809,7 +809,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
@@ -841,7 +841,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
@@ -873,7 +873,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
@@ -905,7 +905,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
@@ -928,7 +928,7 @@ declare void @llvm.x86.avx512.storent.q.
define void at test_storent_q_512(<8 x i64> %data, i8* %ptr) {
; CHECK-LABEL: test_storent_q_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovntdq %zmm0, (%rdi)
+; CHECK-NEXT: vmovntps %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
ret void
@@ -939,7 +939,7 @@ declare void @llvm.x86.avx512.storent.pd
define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
; CHECK-LABEL: test_storent_pd_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovntpd %zmm0, (%rdi)
+; CHECK-NEXT: vmovntps %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
ret void
@@ -970,7 +970,7 @@ define <16 x i32> @test_mask_xor_epi32(<
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -992,7 +992,7 @@ define <16 x i32> @test_mask_or_epi32(<1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -1014,7 +1014,7 @@ define <16 x i32> @test_mask_and_epi32(<
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -1025,7 +1025,7 @@ declare <16 x i32> @llvm.x86.avx512.mask
define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_xor_epi64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
ret < 8 x i64> %res
@@ -1036,7 +1036,7 @@ define <8 x i64> @test_mask_xor_epi64(<8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -1047,7 +1047,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_or_epi64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
ret < 8 x i64> %res
@@ -1058,7 +1058,7 @@ define <8 x i64> @test_mask_or_epi64(<8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -1069,7 +1069,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_and_epi64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
ret < 8 x i64> %res
@@ -1080,7 +1080,7 @@ define <8 x i64> @test_mask_and_epi64(<8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -679,7 +679,7 @@ define <8 x i64> @test_mask_conflict_q(<
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
@@ -713,7 +713,7 @@ define <16 x i32> @test_mask_lzcnt_d(<16
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret <16 x i32> %res
@@ -724,7 +724,7 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
@@ -861,7 +861,7 @@ define <8 x i64> @test_mask_valign_q(<8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
ret <8 x i64> %res
@@ -1342,7 +1342,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1374,7 +1374,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1406,7 +1406,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1438,7 +1438,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1470,7 +1470,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1502,7 +1502,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1534,7 +1534,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1566,7 +1566,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1599,7 +1599,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1631,7 +1631,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1663,7 +1663,7 @@ define <16 x i32> @test_x86_avx512_mask_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1695,7 +1695,7 @@ define <8 x i64> @test_x86_avx512_mask_p
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1960,7 +1960,7 @@ define <16 x i32> @test_mask_add_epi32_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -1991,7 +1991,7 @@ define <16 x i32> @test_mask_add_epi32_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
@@ -2026,7 +2026,7 @@ define <16 x i32> @test_mask_add_epi32_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -2064,7 +2064,7 @@ define <16 x i32> @test_mask_sub_epi32_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -2095,7 +2095,7 @@ define <16 x i32> @test_mask_sub_epi32_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
@@ -2130,7 +2130,7 @@ define <16 x i32> @test_mask_sub_epi32_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -2168,7 +2168,7 @@ define <8 x i64> @test_mask_add_epi64_rr
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2199,7 +2199,7 @@ define <8 x i64> @test_mask_add_epi64_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
@@ -2234,7 +2234,7 @@ define <8 x i64> @test_mask_add_epi64_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2272,7 +2272,7 @@ define <8 x i64> @test_mask_sub_epi64_rr
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2303,7 +2303,7 @@ define <8 x i64> @test_mask_sub_epi64_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
@@ -2338,7 +2338,7 @@ define <8 x i64> @test_mask_sub_epi64_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2376,7 +2376,7 @@ define <8 x i64> @test_mask_mul_epi32_rr
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2407,7 +2407,7 @@ define <8 x i64> @test_mask_mul_epi32_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
@@ -2443,7 +2443,7 @@ define <8 x i64> @test_mask_mul_epi32_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2483,7 +2483,7 @@ define <8 x i64> @test_mask_mul_epu32_rr
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2514,7 +2514,7 @@ define <8 x i64> @test_mask_mul_epu32_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
@@ -2550,7 +2550,7 @@ define <8 x i64> @test_mask_mul_epu32_rm
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2590,7 +2590,7 @@ define <16 x i32> @test_mask_mullo_epi32
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -2621,7 +2621,7 @@ define <16 x i32> @test_mask_mullo_epi32
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
@@ -2656,7 +2656,7 @@ define <16 x i32> @test_mask_mullo_epi32
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -3285,7 +3285,7 @@ define <2 x double> @test_mask_add_sd_rn
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
ret <2 x double> %res
@@ -3297,7 +3297,7 @@ define <2 x double> @test_mask_add_sd_rd
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
ret <2 x double> %res
@@ -3309,7 +3309,7 @@ define <2 x double> @test_mask_add_sd_ru
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
ret <2 x double> %res
@@ -3321,7 +3321,7 @@ define <2 x double> @test_mask_add_sd_rz
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
ret <2 x double> %res
@@ -3333,7 +3333,7 @@ define <2 x double> @test_mask_add_sd_cu
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -3432,7 +3432,7 @@ define <2 x double> @test_mask_max_sd_sa
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
ret <2 x double> %res
@@ -3464,7 +3464,7 @@ define <2 x double> @test_mask_max_sd(<2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -3788,7 +3788,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
@@ -3806,7 +3806,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
@@ -3823,7 +3823,7 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
@@ -3840,7 +3840,7 @@ define <8 x i64>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
@@ -3857,7 +3857,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2
; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
@@ -3875,7 +3875,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2
; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
@@ -3895,7 +3895,7 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
@@ -3913,7 +3913,7 @@ define <8 x i64>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
@@ -3930,7 +3930,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
@@ -4956,7 +4956,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm3
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
@@ -5197,7 +5197,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
@@ -5214,7 +5214,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
@@ -5231,7 +5231,7 @@ define <8 x i64>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
@@ -5248,7 +5248,7 @@ define <8 x i64>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
@@ -5995,7 +5995,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
@@ -6017,10 +6017,10 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1
@@ -6041,10 +6041,10 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
@@ -6065,9 +6065,9 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
@@ -6088,10 +6088,10 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1
@@ -6111,9 +6111,9 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
@@ -6135,9 +6135,9 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
@@ -6159,10 +6159,10 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/avx512-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-logic.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll Fri Jul 22 00:00:52 2016
@@ -157,7 +157,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a
;
; SKX-LABEL: and_v64i8:
; SKX: ## BB#0:
-; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%res = and <64 x i8> %a, %b
ret <64 x i8> %res
@@ -172,7 +172,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %
;
; SKX-LABEL: andn_v64i8:
; SKX: ## BB#0:
-; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0
+; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0
; SKX-NEXT: retq
%b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
@@ -191,7 +191,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a,
;
; SKX-LABEL: or_v64i8:
; SKX: ## BB#0:
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%res = or <64 x i8> %a, %b
ret <64 x i8> %res
@@ -206,7 +206,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a
;
; SKX-LABEL: xor_v64i8:
; SKX: ## BB#0:
-; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%res = xor <64 x i8> %a, %b
ret <64 x i8> %res
@@ -221,7 +221,7 @@ define <32 x i16> @and_v32i16(<32 x i16>
;
; SKX-LABEL: and_v32i16:
; SKX: ## BB#0:
-; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%res = and <32 x i16> %a, %b
ret <32 x i16> %res
@@ -236,7 +236,7 @@ define <32 x i16> @andn_v32i16(<32 x i16
;
; SKX-LABEL: andn_v32i16:
; SKX: ## BB#0:
-; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0
+; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0
; SKX-NEXT: retq
%b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -253,7 +253,7 @@ define <32 x i16> @or_v32i16(<32 x i16>
;
; SKX-LABEL: or_v32i16:
; SKX: ## BB#0:
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%res = or <32 x i16> %a, %b
ret <32 x i16> %res
@@ -268,7 +268,7 @@ define <32 x i16> @xor_v32i16(<32 x i16>
;
; SKX-LABEL: xor_v32i16:
; SKX: ## BB#0:
-; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%res = xor <32 x i16> %a, %b
ret <32 x i16> %res
Modified: llvm/trunk/test/CodeGen/X86/avx512-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mov.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mov.ll Fri Jul 22 00:00:52 2016
@@ -231,7 +231,7 @@ define <8 x i64> @test23(i8 * %addr) {
define void @test24(i8 * %addr, <8 x double> %data) {
; CHECK-LABEL: test24:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07]
+; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
store <8 x double>%data, <8 x double>* %vaddr, align 64
@@ -241,7 +241,7 @@ define void @test24(i8 * %addr, <8 x dou
define <8 x double> @test25(i8 * %addr) {
; CHECK-LABEL: test25:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
%res = load <8 x double>, <8 x double>* %vaddr, align 64
@@ -271,7 +271,7 @@ define <16 x float> @test27(i8 * %addr)
define void @test28(i8 * %addr, <8 x double> %data) {
; CHECK-LABEL: test28:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07]
+; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
store <8 x double>%data, <8 x double>* %vaddr, align 1
@@ -281,7 +281,7 @@ define void @test28(i8 * %addr, <8 x dou
define <8 x double> @test29(i8 * %addr) {
; CHECK-LABEL: test29:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
%res = load <8 x double>, <8 x double>* %vaddr, align 1
Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Fri Jul 22 00:00:52 2016
@@ -27,7 +27,7 @@ define <8 x i64> @select01(i32 %a, <8 x
; CHECK-NEXT: ## BB#1:
; CHECK-NEXT: vmovaps %zmm0, %zmm1
; CHECK-NEXT: LBB1_2:
-; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%cmpres = icmp eq i32 %a, 255
%selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll Fri Jul 22 00:00:52 2016
@@ -359,7 +359,7 @@ define <64 x i8> @_invec32xi8(<32 x i8>%
; AVX512F-LABEL: _invec32xi8:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vmovaps %zmm0, %zmm1
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: _invec32xi8:
@@ -374,7 +374,7 @@ define <32 x i16> @_invec16xi16(<16 x i1
; AVX512F-LABEL: _invec16xi16:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vmovaps %zmm0, %zmm1
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: _invec16xi16:
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -669,14 +669,14 @@ define <32 x i16> @test_mask_packs_epi32
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -719,7 +719,7 @@ define <32 x i16> @test_mask_packs_epi32
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
@@ -727,7 +727,7 @@ define <32 x i16> @test_mask_packs_epi32
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
@@ -775,7 +775,7 @@ define <32 x i16> @test_mask_packs_epi32
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
@@ -783,7 +783,7 @@ define <32 x i16> @test_mask_packs_epi32
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -833,7 +833,7 @@ define <64 x i8> @test_mask_packs_epi16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
@@ -842,7 +842,7 @@ define <64 x i8> @test_mask_packs_epi16_
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
@@ -887,7 +887,7 @@ define <64 x i8> @test_mask_packs_epi16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rsi, %k1
; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
@@ -897,7 +897,7 @@ define <64 x i8> @test_mask_packs_epi16_
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
@@ -946,14 +946,14 @@ define <32 x i16> @test_mask_packus_epi3
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -996,7 +996,7 @@ define <32 x i16> @test_mask_packus_epi3
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
@@ -1004,7 +1004,7 @@ define <32 x i16> @test_mask_packus_epi3
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
@@ -1052,7 +1052,7 @@ define <32 x i16> @test_mask_packus_epi3
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
@@ -1060,7 +1060,7 @@ define <32 x i16> @test_mask_packus_epi3
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -1110,7 +1110,7 @@ define <64 x i8> @test_mask_packus_epi16
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
@@ -1119,7 +1119,7 @@ define <64 x i8> @test_mask_packus_epi16
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
@@ -1164,7 +1164,7 @@ define <64 x i8> @test_mask_packus_epi16
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rsi, %k1
; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
@@ -1174,7 +1174,7 @@ define <64 x i8> @test_mask_packus_epi16
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
@@ -1222,14 +1222,14 @@ define <32 x i16> @test_mask_adds_epi16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -1272,7 +1272,7 @@ define <32 x i16> @test_mask_adds_epi16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
@@ -1280,7 +1280,7 @@ define <32 x i16> @test_mask_adds_epi16_
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
@@ -1326,14 +1326,14 @@ define <32 x i16> @test_mask_subs_epi16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -1376,7 +1376,7 @@ define <32 x i16> @test_mask_subs_epi16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
@@ -1384,7 +1384,7 @@ define <32 x i16> @test_mask_subs_epi16_
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
@@ -1430,14 +1430,14 @@ define <32 x i16> @test_mask_adds_epu16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -1480,7 +1480,7 @@ define <32 x i16> @test_mask_adds_epu16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
@@ -1488,7 +1488,7 @@ define <32 x i16> @test_mask_adds_epu16_
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
@@ -1534,14 +1534,14 @@ define <32 x i16> @test_mask_subs_epu16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -1584,7 +1584,7 @@ define <32 x i16> @test_mask_subs_epu16_
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
@@ -1592,7 +1592,7 @@ define <32 x i16> @test_mask_subs_epu16_
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
@@ -1825,7 +1825,7 @@ define <32 x i16>@test_int_x86_avx512_ma
; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
@@ -1834,7 +1834,7 @@ define <32 x i16>@test_int_x86_avx512_ma
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
@@ -1851,7 +1851,7 @@ define <32 x i16>@test_int_x86_avx512_ma
; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
@@ -1860,7 +1860,7 @@ define <32 x i16>@test_int_x86_avx512_ma
; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
@@ -1877,7 +1877,7 @@ define <32 x i16>@test_int_x86_avx512_ma
; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
@@ -1886,7 +1886,7 @@ define <32 x i16>@test_int_x86_avx512_ma
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -2269,7 +2269,7 @@ define <32 x i16> @test_mask_add_epi16_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
-; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -2300,7 +2300,7 @@ define <32 x i16> @test_mask_add_epi16_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
@@ -2334,7 +2334,7 @@ define <32 x i16> @test_mask_sub_epi16_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
-; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -2365,7 +2365,7 @@ define <32 x i16> @test_mask_sub_epi16_r
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
@@ -2399,7 +2399,7 @@ define <32 x i16> @test_mask_mullo_epi16
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
-; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
@@ -2430,7 +2430,7 @@ define <32 x i16> @test_mask_mullo_epi16
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -15,7 +15,7 @@ define <8 x i64> @test_mask_mullo_epi64_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
-; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret <8 x i64> %res
@@ -46,7 +46,7 @@ define <8 x i64> @test_mask_mullo_epi64_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
@@ -81,7 +81,7 @@ define <8 x i64> @test_mask_mullo_epi64_
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -24,11 +24,14 @@ declare <64 x i8> @llvm.x86.avx512.mask.
define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512:
-; CHECK: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK: vpmultishiftqb %zmm1, %zmm0, %zmm0
-; CHECK: vpaddb %zmm3, %zmm2, %zmm1
-; CHECK: vpaddb %zmm0, %zmm1, %zmm0
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
@@ -42,15 +45,15 @@ declare <64 x i8> @llvm.x86.avx512.mask.
define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
-; CHECK-NEXT: vpaddb %zmm4, %zmm3, %zmm0
-; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
+; CHECK-NEXT: vpaddb %zmm4, %zmm3, %zmm0
+; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
@@ -64,15 +67,15 @@ declare <64 x i8> @llvm.x86.avx512.mask.
define <64 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
-; CHECK-NEXT: vpaddb %zmm4, %zmm3, %zmm0
-; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
+; CHECK-NEXT: vpaddb %zmm4, %zmm3, %zmm0
+; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
@@ -86,10 +89,10 @@ declare <64 x i8> @llvm.x86.avx512.maskz
define <64 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1} {z}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
ret <64 x i8> %res
}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Fri Jul 22 00:00:52 2016
@@ -2330,7 +2330,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.
define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
+; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
ret <2 x i64> %res
@@ -2360,7 +2360,7 @@ define <2 x i64> @test_mask_andnot_epi64
define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
; CHECK-LABEL: test_mask_andnot_epi64_rm_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
+; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%b = load <2 x i64>, <2 x i64>* %ptr_b
%res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
@@ -2434,7 +2434,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.
define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
+; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
ret <4 x i64> %res
@@ -2464,7 +2464,7 @@ define <4 x i64> @test_mask_andnot_epi64
define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
; CHECK-LABEL: test_mask_andnot_epi64_rm_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
+; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%b = load <4 x i64>, <4 x i64>* %ptr_b
%res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Fri Jul 22 00:00:52 2016
@@ -857,7 +857,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define void @compr7(i8* %addr, <8 x double> %data) {
; CHECK-LABEL: compr7:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07]
+; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret void
@@ -973,7 +973,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
; CHECK-LABEL: expand7:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret <8 x double> %res
Modified: llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll Fri Jul 22 00:00:52 2016
@@ -244,7 +244,7 @@ define <8 x double> @test_8f64_fmsub_loa
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %zmm2
; AVX512-NEXT: vfmsub213pd %zmm1, %zmm0, %zmm2
-; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: vmovapd %zmm2, %zmm0
; AVX512-NEXT: retq
%x = load <8 x double>, <8 x double>* %a0
%y = fmul <8 x double> %x, %a1
@@ -573,7 +573,7 @@ define <8 x double> @test_v8f64_interp(<
;
; AVX512-LABEL: test_v8f64_interp:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vmovapd %zmm2, %zmm3
; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm3
; AVX512-NEXT: vfmadd213pd %zmm3, %zmm2, %zmm0
; AVX512-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Fri Jul 22 00:00:52 2016
@@ -103,7 +103,7 @@ define <16 x i32> @test3(i32* %base, <16
; KNL_64: # BB#0:
; KNL_64-NEXT: kmovw %esi, %k1
; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test3:
@@ -111,14 +111,14 @@ define <16 x i32> @test3(i32* %base, <16
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test3:
; SKX: # BB#0:
; SKX-NEXT: kmovw %esi, %k1
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
@@ -138,7 +138,7 @@ define <16 x i32> @test4(i32* %base, <16
; KNL_64-NEXT: kmovw %esi, %k1
; KNL_64-NEXT: kmovw %k1, %k2
; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm2
+; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2
; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
; KNL_64-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; KNL_64-NEXT: retq
@@ -149,7 +149,7 @@ define <16 x i32> @test4(i32* %base, <16
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; KNL_32-NEXT: kmovw %k1, %k2
; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm2
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; KNL_32-NEXT: retl
@@ -159,7 +159,7 @@ define <16 x i32> @test4(i32* %base, <16
; SKX-NEXT: kmovw %esi, %k1
; SKX-NEXT: kmovw %k1, %k2
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
-; SKX-NEXT: vmovaps %zmm1, %zmm2
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm2
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; SKX-NEXT: retq
@@ -246,7 +246,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test6:
@@ -256,7 +256,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2}
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test6:
@@ -282,7 +282,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %k1, %k2
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm2
+; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; KNL_64-NEXT: retq
@@ -295,7 +295,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw %k1, %k2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm2
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; KNL_32-NEXT: retl
@@ -344,7 +344,7 @@ define <16 x i32> @test8(<16 x i32*> %pt
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; KNL_32-NEXT: kmovw %k1, %k2
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm2
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; KNL_32-NEXT: retl
@@ -369,7 +369,7 @@ define <16 x i32> @test8(<16 x i32*> %pt
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; SKX_32-NEXT: kmovw %k1, %k2
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm2
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; SKX_32-NEXT: retl
@@ -733,7 +733,7 @@ define <4 x double> @test16(double* %bas
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovapd %zmm2, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test16:
@@ -748,7 +748,7 @@ define <4 x double> @test16(double* %bas
; KNL_32-NEXT: vpsllvq .LCPI15_0, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %zmm2, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test16:
@@ -783,7 +783,7 @@ define <2 x double> @test17(double* %bas
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovapd %zmm2, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test17:
@@ -794,7 +794,7 @@ define <2 x double> @test17(double* %bas
; KNL_32-NEXT: vpsllvq .LCPI16_0, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %zmm2, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test17:
@@ -1080,7 +1080,7 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test23:
@@ -1091,7 +1091,7 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_32-NEXT: vpsllvq .LCPI22_0, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test23:
@@ -1122,7 +1122,7 @@ define <2 x i32> @test24(i32* %base, <2
; KNL_64: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
@@ -1133,7 +1133,7 @@ define <2 x i32> @test24(i32* %base, <2
; KNL_32-NEXT: vpsllvq .LCPI23_1, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test24:
@@ -1165,7 +1165,7 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test25:
@@ -1176,7 +1176,7 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_32-NEXT: vpsllvq .LCPI24_0, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test25:
@@ -1208,7 +1208,7 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_64: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test26:
@@ -1219,7 +1219,7 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_32-NEXT: vpsllvq .LCPI25_1, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test26:
@@ -1546,15 +1546,15 @@ define <16 x float*> @test31(<16 x float
; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2}
; KNL_64-NEXT: kshiftrw $8, %k1, %k1
; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
-; KNL_64-NEXT: vmovaps %zmm3, %zmm1
+; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm1
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test31:
; KNL_32: # BB#0:
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test31:
@@ -1564,15 +1564,15 @@ define <16 x float*> @test31(<16 x float
; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2}
; SKX-NEXT: kshiftrw $8, %k1, %k1
; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1}
-; SKX-NEXT: vmovaps %zmm2, %zmm0
-; SKX-NEXT: vmovaps %zmm3, %zmm1
+; SKX-NEXT: vmovdqa64 %zmm2, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm3, %zmm1
; SKX-NEXT: retq
;
; SKX_32-LABEL: test31:
; SKX_32: # BB#0:
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX_32-NEXT: retl
%res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
@@ -1598,7 +1598,7 @@ define <16 x i32> @test_gather_16i32(<16
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test_gather_16i32:
@@ -1619,7 +1619,7 @@ define <16 x i32> @test_gather_16i32(<16
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
-; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
; SKX_32-NEXT: retl
%res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
ret <16 x i32> %res
@@ -1633,8 +1633,8 @@ define <16 x i64> @test_gather_16i64(<16
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1}
; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2}
-; KNL_64-NEXT: vmovaps %zmm3, %zmm0
-; KNL_64-NEXT: vmovaps %zmm4, %zmm1
+; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm0
+; KNL_64-NEXT: vmovdqa64 %zmm4, %zmm1
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_gather_16i64:
@@ -1657,7 +1657,7 @@ define <16 x i64> @test_gather_16i64(<16
; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: retl
@@ -1670,8 +1670,8 @@ define <16 x i64> @test_gather_16i64(<16
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1}
; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2}
-; SKX-NEXT: vmovaps %zmm3, %zmm0
-; SKX-NEXT: vmovaps %zmm4, %zmm1
+; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm4, %zmm1
; SKX-NEXT: retq
;
; SKX_32-LABEL: test_gather_16i64:
@@ -1694,7 +1694,7 @@ define <16 x i64> @test_gather_16i64(<16
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
-; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: retl
@@ -1756,8 +1756,8 @@ define <16 x double> @test_gather_16f64(
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
; KNL_64-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1}
; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2}
-; KNL_64-NEXT: vmovaps %zmm3, %zmm0
-; KNL_64-NEXT: vmovaps %zmm4, %zmm1
+; KNL_64-NEXT: vmovapd %zmm3, %zmm0
+; KNL_64-NEXT: vmovapd %zmm4, %zmm1
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_gather_16f64:
@@ -1780,7 +1780,7 @@ define <16 x double> @test_gather_16f64(
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %zmm2, %zmm0
; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: retl
@@ -1793,8 +1793,8 @@ define <16 x double> @test_gather_16f64(
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1}
; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2}
-; SKX-NEXT: vmovaps %zmm3, %zmm0
-; SKX-NEXT: vmovaps %zmm4, %zmm1
+; SKX-NEXT: vmovapd %zmm3, %zmm0
+; SKX-NEXT: vmovapd %zmm4, %zmm1
; SKX-NEXT: retq
;
; SKX_32-LABEL: test_gather_16f64:
@@ -1817,7 +1817,7 @@ define <16 x double> @test_gather_16f64(
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
-; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: vmovapd %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Fri Jul 22 00:00:52 2016
@@ -200,7 +200,7 @@ define <8 x double> @test5(<8 x i32> %tr
; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovapd %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: test5:
@@ -208,7 +208,7 @@ define <8 x double> @test5(<8 x i32> %tr
; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovapd %zmm1, %zmm0
; SKX-NEXT: retq
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
%res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst)
@@ -501,7 +501,7 @@ define <8 x i32> @test11b(<8 x i1> %mask
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: test11b:
@@ -1314,7 +1314,7 @@ define void @one_mask_bit_set5(<8 x doub
; AVX512-LABEL: one_mask_bit_set5:
; AVX512: ## BB#0:
; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512-NEXT: vmovlpd %xmm0, 48(%rdi)
+; AVX512-NEXT: vmovlps %xmm0, 48(%rdi)
; AVX512-NEXT: retq
call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>)
ret void
@@ -1877,8 +1877,8 @@ define <16 x i64> @test_load_16i64(<16 x
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
-; AVX512F-NEXT: vmovaps %zmm2, %zmm1
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm1
; AVX512F-NEXT: retq
;
; SKX-LABEL: test_load_16i64:
@@ -1888,8 +1888,8 @@ define <16 x i64> @test_load_16i64(<16 x
; SKX-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
; SKX-NEXT: kshiftrw $8, %k1, %k1
; SKX-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
-; SKX-NEXT: vmovaps %zmm2, %zmm1
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm2, %zmm1
; SKX-NEXT: retq
%res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
ret <16 x i64> %res
@@ -1981,8 +1981,8 @@ define <16 x double> @test_load_16f64(<1
; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k1}
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
; AVX512F-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
-; AVX512F-NEXT: vmovaps %zmm2, %zmm1
+; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vmovapd %zmm2, %zmm1
; AVX512F-NEXT: retq
;
; SKX-LABEL: test_load_16f64:
@@ -1992,8 +1992,8 @@ define <16 x double> @test_load_16f64(<1
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
; SKX-NEXT: kshiftrw $8, %k1, %k1
; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
-; SKX-NEXT: vmovaps %zmm2, %zmm1
+; SKX-NEXT: vmovapd %zmm1, %zmm0
+; SKX-NEXT: vmovapd %zmm2, %zmm1
; SKX-NEXT: retq
%res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
ret <16 x double> %res
@@ -2204,10 +2204,10 @@ define <32 x double> @test_load_32f64(<3
; AVX512F-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
; AVX512F-NEXT: kshiftrw $8, %k2, %k1
; AVX512F-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
-; AVX512F-NEXT: vmovaps %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm3, %zmm2
-; AVX512F-NEXT: vmovaps %zmm4, %zmm3
+; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vmovapd %zmm2, %zmm1
+; AVX512F-NEXT: vmovapd %zmm3, %zmm2
+; AVX512F-NEXT: vmovapd %zmm4, %zmm3
; AVX512F-NEXT: retq
;
; SKX-LABEL: test_load_32f64:
@@ -2221,10 +2221,10 @@ define <32 x double> @test_load_32f64(<3
; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
; SKX-NEXT: kshiftrw $8, %k2, %k1
; SKX-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
-; SKX-NEXT: vmovaps %zmm2, %zmm1
-; SKX-NEXT: vmovaps %zmm3, %zmm2
-; SKX-NEXT: vmovaps %zmm4, %zmm3
+; SKX-NEXT: vmovapd %zmm1, %zmm0
+; SKX-NEXT: vmovapd %zmm2, %zmm1
+; SKX-NEXT: vmovapd %zmm3, %zmm2
+; SKX-NEXT: vmovapd %zmm4, %zmm3
; SKX-NEXT: retq
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
ret <32 x double> %res
@@ -5538,7 +5538,7 @@ define <64 x i8> @test_mask_load_64xi8(<
; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
; SKX-NEXT: vpmovb2m %zmm0, %k1
; SKX-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
%res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
ret <64 x i8> %res
@@ -6912,7 +6912,7 @@ define <32 x i16> @test_mask_load_32xi16
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
; SKX-NEXT: vpmovb2m %ymm0, %k1
; SKX-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
%res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
ret <32 x i16> %res
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Fri Jul 22 00:00:52 2016
@@ -83,13 +83,13 @@ define <8 x double> @merge_8f64_4f64_z2(
define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_23uuuuu9:
; ALL: # BB#0:
-; ALL-NEXT: vmovupd 16(%rdi), %zmm0
+; ALL-NEXT: vmovups 16(%rdi), %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovupd 16(%eax), %zmm0
+; X32-AVX512F-NEXT: vmovups 16(%eax), %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 2
%ptr1 = getelementptr inbounds double, double* %ptr, i64 3
@@ -138,7 +138,7 @@ define <8 x double> @merge_8f64_f64_12zz
define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
; ALL: # BB#0:
-; ALL-NEXT: vmovupd 8(%rdi), %zmm0
+; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
; ALL-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
@@ -147,7 +147,7 @@ define <8 x double> @merge_8f64_f64_1u3u
; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0
+; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
; X32-AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
@@ -334,7 +334,7 @@ define <16 x float> @merge_16f32_f32_0uu
define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
; ALL: # BB#0:
-; ALL-NEXT: vmovups (%rdi), %zmm0
+; ALL-NEXT: vmovdqu64 (%rdi), %zmm0
; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
@@ -343,7 +343,7 @@ define <16 x float> @merge_16f32_f32_0uu
; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovups (%eax), %zmm0
+; X32-AVX512F-NEXT: vmovdqu64 (%eax), %zmm0
; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
; X32-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Fri Jul 22 00:00:52 2016
@@ -117,7 +117,7 @@ define void @test_zero_v4f32(<4 x float>
; VLX-LABEL: test_zero_v4f32:
; VLX: # BB#0:
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
ret void
@@ -139,7 +139,7 @@ define void @test_zero_v4i32(<4 x i32>*
; VLX-LABEL: test_zero_v4i32:
; VLX: # BB#0:
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
@@ -162,7 +162,7 @@ define void @test_zero_v2f64(<2 x double
; VLX-LABEL: test_zero_v2f64:
; VLX: # BB#0:
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
ret void
@@ -184,7 +184,7 @@ define void @test_zero_v2i64(<2 x i64>*
; VLX-LABEL: test_zero_v2i64:
; VLX: # BB#0:
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
ret void
@@ -206,7 +206,7 @@ define void @test_zero_v8i16(<8 x i16>*
; VLX-LABEL: test_zero_v8i16:
; VLX: # BB#0:
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
ret void
@@ -228,7 +228,7 @@ define void @test_zero_v16i8(<16 x i8>*
; VLX-LABEL: test_zero_v16i8:
; VLX: # BB#0:
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
ret void
@@ -657,7 +657,7 @@ define void @test_arg_v4i32(<4 x i32> %a
;
; VLX-LABEL: test_arg_v4i32:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x i32> %arg, <4 x i32>* %dst, align 16, !nontemporal !1
ret void
@@ -676,7 +676,7 @@ define void @test_arg_v2f64(<2 x double>
;
; VLX-LABEL: test_arg_v2f64:
; VLX: # BB#0:
-; VLX-NEXT: vmovntpd %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x double> %arg, <2 x double>* %dst, align 16, !nontemporal !1
ret void
@@ -695,7 +695,7 @@ define void @test_arg_v2i64(<2 x i64> %a
;
; VLX-LABEL: test_arg_v2i64:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x i64> %arg, <2 x i64>* %dst, align 16, !nontemporal !1
ret void
@@ -714,7 +714,7 @@ define void @test_arg_v8i16(<8 x i16> %a
;
; VLX-LABEL: test_arg_v8i16:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <8 x i16> %arg, <8 x i16>* %dst, align 16, !nontemporal !1
ret void
@@ -733,7 +733,7 @@ define void @test_arg_v16i8(<16 x i8> %a
;
; VLX-LABEL: test_arg_v16i8:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <16 x i8> %arg, <16 x i8>* %dst, align 16, !nontemporal !1
ret void
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll Fri Jul 22 00:00:52 2016
@@ -1536,7 +1536,7 @@ define <8 x double> @test_unaligned_v8f6
;
; AVX512-LABEL: test_unaligned_v8f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovupd (%rdi), %zmm0
+; AVX512-NEXT: vmovups (%rdi), %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %src, align 1, !nontemporal !1
ret <8 x double> %1
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Fri Jul 22 00:00:52 2016
@@ -391,7 +391,7 @@ define <2 x i64> @mul_v2i64spill(<2 x i6
; AVX512-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
; AVX512-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
; AVX512-NEXT: vpsrlq $32, %xmm2, %xmm1
-; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm3
; AVX512-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1
; AVX512-NEXT: vpsrlq $32, %xmm4, %xmm2
Modified: llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll Fri Jul 22 00:00:52 2016
@@ -75,7 +75,7 @@ define x86_fp80 @s32_to_x(i32 %a) nounwi
; CHECK-LABEL: u64_to_f
; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
; AVX512_64: vcvtusi2ssq
@@ -111,7 +111,7 @@ define float @s64_to_f(i64 %a) nounwind
; AVX512_32: vmovd %eax, %xmm0
; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: vmovq %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll {{[0-9]+}}(%esp)
define float @s64_to_f_2(i64 %a) nounwind {
@@ -151,7 +151,7 @@ define double @s64_to_d(i64 %a) nounwind
; AVX512_32: vmovd %eax, %xmm0
; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: vmovq %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
define double @s64_to_d_2(i64 %a) nounwind {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll Fri Jul 22 00:00:52 2016
@@ -76,7 +76,7 @@ declare <4 x float> @llvm.x86.sse.add.ss
define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_andpd
- ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = bitcast <2 x double> %a0 to <2 x i64>
%3 = bitcast <2 x double> %a1 to <2 x i64>
@@ -89,7 +89,7 @@ define <2 x double> @stack_fold_andpd(<2
define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) {
;CHECK-LABEL: stack_fold_andpd_ymm
- ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = bitcast <4 x double> %a0 to <4 x i64>
%3 = bitcast <4 x double> %a1 to <4 x i64>
@@ -198,7 +198,7 @@ declare <4 x float> @llvm.x86.sse.mul.ss
define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_orpd
- ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = bitcast <2 x double> %a0 to <2 x i64>
%3 = bitcast <2 x double> %a1 to <2 x i64>
@@ -211,7 +211,7 @@ define <2 x double> @stack_fold_orpd(<2
define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
;CHECK-LABEL: stack_fold_orpd_ymm
- ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = bitcast <4 x double> %a0 to <4 x i64>
%3 = bitcast <4 x double> %a1 to <4 x i64>
@@ -316,7 +316,7 @@ declare <4 x float> @llvm.x86.sse.sub.ss
define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_xorpd
- ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = bitcast <2 x double> %a0 to <2 x i64>
%3 = bitcast <2 x double> %a1 to <2 x i64>
@@ -329,7 +329,7 @@ define <2 x double> @stack_fold_xorpd(<2
define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
;CHECK-LABEL: stack_fold_xorpd_ymm
- ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = bitcast <4 x double> %a0 to <4 x i64>
%3 = bitcast <4 x double> %a1 to <4 x i64>
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Fri Jul 22 00:00:52 2016
@@ -3001,7 +3001,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
; AVX512-NEXT: .cfi_offset %r14, -24
; AVX512-NEXT: .Ltmp24:
; AVX512-NEXT: .cfi_offset %r15, -16
-; AVX512-NEXT: vmovups %zmm0, (%rsp) # 64-byte Spill
+; AVX512-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movw %ax, %bx
@@ -3011,9 +3011,9 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %r15d
; AVX512-NEXT: orl %ebx, %r15d
-; AVX512-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movw %ax, %bx
@@ -3024,7 +3024,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
; AVX512-NEXT: orl %ebx, %r14d
; AVX512-NEXT: shlq $32, %r14
; AVX512-NEXT: orq %r15, %r14
-; AVX512-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -3862,17 +3862,17 @@ define void @store_cvt_8f64_to_8i16(<8 x
; AVX512-NEXT: .Ltmp67:
; AVX512-NEXT: .cfi_offset %rbp, -16
; AVX512-NEXT: movq %rdi, %rbx
-; AVX512-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
+; AVX512-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Fri Jul 22 00:00:52 2016
@@ -262,7 +262,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_
define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:
-; ALL-NEXT: vmovups (%rsi), %zmm0
+; ALL-NEXT: vmovupd (%rsi), %zmm0
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: retq
%ptr_a = bitcast float* %a to <16 x float>*
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Fri Jul 22 00:00:52 2016
@@ -268,14 +268,14 @@ define <8 x double> @shuffle_v8f64_8823c
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -287,14 +287,14 @@ define <8 x double> @shuffle_v8f64_9832d
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x double> %shuffle
@@ -306,14 +306,14 @@ define <8 x double> @shuffle_v8f64_9810d
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x double> %shuffle
@@ -376,14 +376,14 @@ define <8 x double> @shuffle_v8f64_08991
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x double> %shuffle
@@ -412,14 +412,14 @@ define <8 x double> @shuffle_v8f64_09ab1
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x double> %shuffle
@@ -933,14 +933,14 @@ define <8 x double> @shuffle_v8f64_c348c
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
ret <8 x double> %shuffle
@@ -1191,14 +1191,14 @@ define <8 x i64> @shuffle_v8i64_81a3c5e7
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i64> %shuffle
@@ -1244,14 +1244,14 @@ define <8 x i64> @shuffle_v8i64_8823cc67
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1263,14 +1263,14 @@ define <8 x i64> @shuffle_v8i64_9832dc76
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x i64> %shuffle
@@ -1282,14 +1282,14 @@ define <8 x i64> @shuffle_v8i64_9810dc54
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x i64> %shuffle
@@ -1352,14 +1352,14 @@ define <8 x i64> @shuffle_v8i64_08991abb
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i64> %shuffle
@@ -1388,14 +1388,14 @@ define <8 x i64> @shuffle_v8i64_09ab1def
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x i64> %shuffle
@@ -1925,14 +1925,14 @@ define <8 x i64> @shuffle_v8i64_6caa87e5
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i64> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Fri Jul 22 00:00:52 2016
@@ -33,7 +33,7 @@ define <8 x double> @combine_permvar_8f6
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 %m)
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 %m)
@@ -56,7 +56,7 @@ define <8 x i64> @combine_permvar_8i64_i
; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
; CHECK-NEXT: vpermq %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x1, i8 %m)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, i8 %m)
@@ -168,10 +168,10 @@ define <16 x float> @combine_vpermt2var_
define <16 x float> @combine_vpermt2var_16f32_vmovddup_load(<16 x float> *%p0, <16 x float> %x1) {
; CHECK-LABEL: combine_vpermt2var_16f32_vmovddup_load:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
; CHECK-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x float>, <16 x float> *%p0
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 12, i32 13>, <16 x float> %x0, <16 x float> %x1, i16 -1)
@@ -191,10 +191,10 @@ define <16 x float> @combine_vpermt2var_
; CHECK-LABEL: combine_vpermt2var_16f32_vmovddup_mask_load:
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
; CHECK-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x float>, <16 x float> *%p0
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 12, i32 13>, <16 x float> %x0, <16 x float> %x1, i16 %m)
@@ -365,7 +365,7 @@ define <64 x i8> @combine_pshufb_identit
; CHECK-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%select = bitcast <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> to <64 x i8>
%mask = bitcast <16 x i32> <i32 202182159, i32 134810123, i32 67438087, i32 66051, i32 202182159, i32 134810123, i32 67438087, i32 66051, i32 202182159, i32 134810123, i32 67438087, i32 66051, i32 202182159, i32 134810123, i32 67438087, i32 66051> to <64 x i8>
@@ -414,7 +414,7 @@ define <8 x i64> @combine_permvar_8i64_a
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 %m)
ret <8 x i64> %1
@@ -433,7 +433,7 @@ define <8 x double> @combine_permvar_8f6
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m)
ret <8 x double> %1
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll?rev=276393&r1=276392&r2=276393&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll Fri Jul 22 00:00:52 2016
@@ -3074,7 +3074,7 @@ define <8 x i16> @trunc_and_v8i64_8i16(<
;
; AVX512-LABEL: trunc_and_v8i64_8i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vandps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = and <8 x i64> %a0, %a1
@@ -3213,8 +3213,8 @@ define <16 x i8> @trunc_and_v16i64_v16i8
;
; AVX512-LABEL: trunc_and_v16i64_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm1
-; AVX512-NEXT: vpandq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vandps %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vandps %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -3445,7 +3445,7 @@ define <8 x i16> @trunc_and_const_v16i64
;
; AVX512-LABEL: trunc_and_const_v16i64_v16i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = and <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
@@ -3587,8 +3587,8 @@ define <16 x i8> @trunc_and_const_v16i64
;
; AVX512-LABEL: trunc_and_const_v16i64_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vandps {{.*}}(%rip), %zmm1, %zmm1
+; AVX512-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -3812,7 +3812,7 @@ define <8 x i16> @trunc_xor_v8i64_8i16(<
;
; AVX512-LABEL: trunc_xor_v8i64_8i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vxorps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = xor <8 x i64> %a0, %a1
@@ -3951,8 +3951,8 @@ define <16 x i8> @trunc_xor_v16i64_v16i8
;
; AVX512-LABEL: trunc_xor_v16i64_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxorq %zmm3, %zmm1, %zmm1
-; AVX512-NEXT: vpxorq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vxorps %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vxorps %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -4183,7 +4183,7 @@ define <8 x i16> @trunc_xor_const_v16i64
;
; AVX512-LABEL: trunc_xor_const_v16i64_v16i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = xor <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
@@ -4325,8 +4325,8 @@ define <16 x i8> @trunc_xor_const_v16i64
;
; AVX512-LABEL: trunc_xor_const_v16i64_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxorq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vxorps {{.*}}(%rip), %zmm1, %zmm1
+; AVX512-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -4550,7 +4550,7 @@ define <8 x i16> @trunc_or_v8i64_8i16(<8
;
; AVX512-LABEL: trunc_or_v8i64_8i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vorps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = or <8 x i64> %a0, %a1
@@ -4689,8 +4689,8 @@ define <16 x i8> @trunc_or_v16i64_v16i8(
;
; AVX512-LABEL: trunc_or_v16i64_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vporq %zmm3, %zmm1, %zmm1
-; AVX512-NEXT: vporq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vorps %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vorps %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -4921,7 +4921,7 @@ define <8 x i16> @trunc_or_const_v16i64_
;
; AVX512-LABEL: trunc_or_const_v16i64_v16i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vporq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vorps {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = or <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
@@ -5063,8 +5063,8 @@ define <16 x i8> @trunc_or_const_v16i64_
;
; AVX512-LABEL: trunc_or_const_v16i64_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vporq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512-NEXT: vporq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vorps {{.*}}(%rip), %zmm1, %zmm1
+; AVX512-NEXT: vorps {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
More information about the llvm-commits
mailing list