[llvm] r280648 - [AVX-512] Simplify X86InstrInfo::copyPhysReg for 128/256-bit vectors with AVX512, but not VLX. We should use the VEX opcodes and trust the register allocator to not use the extended XMM/YMM register space.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 4 23:43:06 PDT 2016
Author: ctopper
Date: Mon Sep 5 01:43:06 2016
New Revision: 280648
URL: http://llvm.org/viewvc/llvm-project?rev=280648&view=rev
Log:
[AVX-512] Simplify X86InstrInfo::copyPhysReg for 128/256-bit vectors with AVX512, but not VLX. We should use the VEX opcodes and trust the register allocator to not use the extended XMM/YMM register space.
Previously we were extending to copying the whole ZMM register. The register allocator shouldn't use XMM16-31 or YMM16-31 in this configuration as the instructions to spill them aren't available.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
llvm/trunk/lib/Target/X86/X86RegisterInfo.h
llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-select.ll
llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/masked_memop.ll
llvm/trunk/test/CodeGen/X86/pmul.ll
llvm/trunk/test/CodeGen/X86/vector-sext.ll
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Sep 5 01:43:06 2016
@@ -4499,22 +4499,6 @@ unsigned copyPhysRegOpcode_AVX512(unsign
if (Subtarget.hasBWI())
if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
return Opc;
- if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
- if (Subtarget.hasVLX())
- return X86::VMOVAPSZ128rr;
- DestReg = get512BitSuperRegister(DestReg);
- SrcReg = get512BitSuperRegister(SrcReg);
- return X86::VMOVAPSZrr;
- }
- if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
- if (Subtarget.hasVLX())
- return X86::VMOVAPSZ256rr;
- DestReg = get512BitSuperRegister(DestReg);
- SrcReg = get512BitSuperRegister(SrcReg);
- return X86::VMOVAPSZrr;
- }
- if (X86::VR512RegClass.contains(DestReg, SrcReg))
- return X86::VMOVAPSZrr;
if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
return X86::KMOVWkk;
if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
@@ -4535,6 +4519,7 @@ void X86InstrInfo::copyPhysReg(MachineBa
// First deal with the normal symmetric copies.
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasVLX = Subtarget.hasVLX();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -4556,12 +4541,14 @@ void X86InstrInfo::copyPhysReg(MachineBa
}
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
+ else if (X86::VR128XRegClass.contains(DestReg, SrcReg))
+ Opc = HasVLX ? X86::VMOVAPSZ128rr : HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
+ else if (X86::VR256XRegClass.contains(DestReg, SrcReg))
+ Opc = HasVLX ? X86::VMOVAPSZ256rr : X86::VMOVAPSYrr;
+ else if (X86::VR512RegClass.contains(DestReg, SrcReg))
+ Opc = X86::VMOVAPSZrr;
else if (HasAVX512)
Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
- else if (X86::VR128RegClass.contains(DestReg, SrcReg))
- Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
- else if (X86::VR256RegClass.contains(DestReg, SrcReg))
- Opc = X86::VMOVAPSYrr;
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Sep 5 01:43:06 2016
@@ -691,13 +691,3 @@ X86RegisterInfo::getPtrSizedFrameRegiste
FrameReg = getX86SubSuperRegister(FrameReg, 32);
return FrameReg;
}
-
-unsigned llvm::get512BitSuperRegister(unsigned Reg) {
- if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
- return X86::ZMM0 + (Reg - X86::XMM0);
- if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
- return X86::ZMM0 + (Reg - X86::YMM0);
- if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31)
- return Reg;
- llvm_unreachable("Unexpected SIMD register");
-}
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Mon Sep 5 01:43:06 2016
@@ -137,9 +137,6 @@ public:
unsigned getSlotSize() const { return SlotSize; }
};
-//get512BitRegister - X86 utility - returns 512-bit super register
-unsigned get512BitSuperRegister(unsigned Reg);
-
} // End llvm namespace
#endif
Modified: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll Mon Sep 5 01:43:06 2016
@@ -335,30 +335,21 @@ define <8 x i1> @test7a(<8 x i32>%a, <8
}
define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
-; KNL-LABEL: test8:
-; KNL: ## BB#0:
-; KNL-NEXT: testb $1, %dil
-; KNL-NEXT: jne LBB8_2
-; KNL-NEXT: ## BB#1:
-; KNL-NEXT: vmovaps %zmm1, %zmm0
-; KNL-NEXT: LBB8_2:
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test8:
-; SKX: ## BB#0:
-; SKX-NEXT: testb $1, %dil
-; SKX-NEXT: jne LBB8_2
-; SKX-NEXT: ## BB#1:
-; SKX-NEXT: vmovaps %xmm1, %xmm0
-; SKX-NEXT: LBB8_2:
-; SKX-NEXT: retq
+; ALL_X64-LABEL: test8:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: testb $1, %dil
+; ALL_X64-NEXT: jne LBB8_2
+; ALL_X64-NEXT: ## BB#1:
+; ALL_X64-NEXT: vmovaps %xmm1, %xmm0
+; ALL_X64-NEXT: LBB8_2:
+; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test8:
; KNL_X32: ## BB#0:
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: jne LBB8_2
; KNL_X32-NEXT: ## BB#1:
-; KNL_X32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_X32-NEXT: vmovaps %xmm1, %xmm0
; KNL_X32-NEXT: LBB8_2:
; KNL_X32-NEXT: retl
%res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Mon Sep 5 01:43:06 2016
@@ -163,7 +163,7 @@ define <32 x i16> @zext_32x8mem_to_32x16
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32x8mem_to_32x16:
@@ -192,7 +192,7 @@ define <32 x i16> @sext_32x8mem_to_32x16
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: sext_32x8mem_to_32x16:
@@ -213,7 +213,7 @@ define <32 x i16> @zext_32x8_to_32x16(<3
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32x8_to_32x16:
@@ -258,7 +258,7 @@ define <32 x i16> @sext_32x8_to_32x16(<3
; KNL-NEXT: vpmovsxbw %xmm0, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: sext_32x8_to_32x16:
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon Sep 5 01:43:06 2016
@@ -198,7 +198,7 @@ define <4 x float> @test_sqrt_ss(<4 x fl
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
@@ -225,7 +225,7 @@ define <2 x double> @test_sqrt_sd(<2 x d
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
@@ -2681,7 +2681,7 @@ define <4 x float> @test_mask_add_ss_rn(
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
ret <4 x float> %res
@@ -2693,7 +2693,7 @@ define <4 x float> @test_mask_add_ss_rd(
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
ret <4 x float> %res
@@ -2705,7 +2705,7 @@ define <4 x float> @test_mask_add_ss_ru(
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
ret <4 x float> %res
@@ -2717,7 +2717,7 @@ define <4 x float> @test_mask_add_ss_rz(
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
ret <4 x float> %res
@@ -2729,7 +2729,7 @@ define <4 x float> @test_mask_add_ss_cur
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
@@ -2763,7 +2763,7 @@ define <2 x double> @test_mask_add_sd_rn
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
ret <2 x double> %res
@@ -2775,7 +2775,7 @@ define <2 x double> @test_mask_add_sd_rd
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
ret <2 x double> %res
@@ -2787,7 +2787,7 @@ define <2 x double> @test_mask_add_sd_ru
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
ret <2 x double> %res
@@ -2799,7 +2799,7 @@ define <2 x double> @test_mask_add_sd_rz
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
ret <2 x double> %res
@@ -2811,7 +2811,7 @@ define <2 x double> @test_mask_add_sd_cu
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -2845,7 +2845,7 @@ define <4 x float> @test_mask_max_ss_sae
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
ret <4 x float> %res
@@ -2877,7 +2877,7 @@ define <4 x float> @test_mask_max_ss(<4
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
@@ -2910,7 +2910,7 @@ define <2 x double> @test_mask_max_sd_sa
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
ret <2 x double> %res
@@ -2942,7 +2942,7 @@ define <2 x double> @test_mask_max_sd(<2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -4191,7 +4191,7 @@ define <4 x float> @test_getexp_ss(<4 x
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
@@ -4218,7 +4218,7 @@ define <2 x double> @test_getexp_sd(<2 x
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
@@ -4438,7 +4438,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %xmm2, %xmm3
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
@@ -4842,7 +4842,7 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
ret <4 x float> %res
@@ -4895,7 +4895,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
ret <2 x double> %res
@@ -5523,10 +5523,10 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
@@ -5547,9 +5547,9 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovaps %xmm0, %xmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
@@ -5617,9 +5617,9 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovapd %zmm0, %zmm4
+; CHECK-NEXT: vmovapd %xmm0, %xmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
@@ -5641,10 +5641,10 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovapd %zmm0, %zmm5
+; CHECK-NEXT: vmovapd %xmm0, %xmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
@@ -5741,11 +5741,11 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfmadd132sd %xmm1, %xmm2, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfmadd132sd {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
@@ -5769,11 +5769,11 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfmadd132ss %xmm1, %xmm2, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfmadd132ss {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
@@ -5797,7 +5797,7 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovaps %xmm1, %xmm3
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0
@@ -5816,7 +5816,7 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
@@ -5830,11 +5830,11 @@ define <2 x double>@test_int_x86_avx512_
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm2, %zmm5
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
@@ -5858,11 +5858,11 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm2, %zmm5
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
@@ -5885,7 +5885,7 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%q = load float, float* %ptr_b
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -5912,7 +5912,7 @@ define <4 x float>@test_int_x86_avx512_m
; CHECK: ## BB#0:
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vfmadd213ss (%rdi), %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%q = load float, float* %ptr_b
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Sep 5 01:43:06 2016
@@ -418,7 +418,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: jg LBB20_2
; KNL-NEXT: ## BB#1:
-; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: vmovaps %xmm1, %xmm0
; KNL-NEXT: LBB20_2:
; KNL-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Mon Sep 5 01:43:06 2016
@@ -40,7 +40,7 @@ define float @select02(float %a, float %
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpless %xmm0, %xmm3, %k1
; CHECK-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = fcmp oge float %a, %eps
%cond = select i1 %cmp, float %c, float %b
@@ -52,7 +52,7 @@ define double @select03(double %a, doubl
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplesd %xmm0, %xmm3, %k1
; CHECK-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovapd %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = fcmp oge double %a, %eps
%cond = select i1 %cmp, double %c, double %b
Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll Mon Sep 5 01:43:06 2016
@@ -359,7 +359,7 @@ define <64 x i8> @_invec32xi8(<32 x i8>%
; AVX512F-LABEL: _invec32xi8:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: _invec32xi8:
@@ -374,7 +374,7 @@ define <32 x i16> @_invec16xi16(<16 x i1
; AVX512F-LABEL: _invec16xi16:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: _invec16xi16:
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Mon Sep 5 01:43:06 2016
@@ -291,7 +291,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa %ymm2, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test6:
@@ -301,7 +301,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2}
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test6:
@@ -336,7 +336,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %k1, %k2
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2
+; KNL_64-NEXT: vmovdqa %ymm1, %ymm2
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; KNL_64-NEXT: retq
@@ -349,7 +349,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw %k1, %k2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
+; KNL_32-NEXT: vmovdqa %ymm1, %ymm2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; KNL_32-NEXT: retl
@@ -851,7 +851,7 @@ define <4 x double> @test16(double* %bas
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovapd %zmm2, %zmm0
+; KNL_64-NEXT: vmovapd %ymm2, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test16:
@@ -868,7 +868,7 @@ define <4 x double> @test16(double* %bas
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovapd %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %ymm2, %ymm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test16:
@@ -905,7 +905,7 @@ define <2 x double> @test17(double* %bas
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovapd %zmm2, %zmm0
+; KNL_64-NEXT: vmovapd %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test17:
@@ -918,7 +918,7 @@ define <2 x double> @test17(double* %bas
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovapd %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test17:
@@ -1165,7 +1165,7 @@ define <2 x float> @test22(float* %base,
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovaps %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test22:
@@ -1181,7 +1181,7 @@ define <2 x float> @test22(float* %base,
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovaps %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test22:
@@ -1226,7 +1226,7 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test23:
@@ -1239,7 +1239,7 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test23:
@@ -1271,7 +1271,7 @@ define <2 x i32> @test24(i32* %base, <2
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
@@ -1283,7 +1283,7 @@ define <2 x i32> @test24(i32* %base, <2
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test24:
@@ -1317,7 +1317,7 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test25:
@@ -1330,7 +1330,7 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test25:
@@ -1364,7 +1364,7 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test26:
@@ -1377,7 +1377,7 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test26:
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Mon Sep 5 01:43:06 2016
@@ -454,7 +454,7 @@ define <8 x float> @test11a(<8 x i32> %t
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; AVX512F-NEXT: vmovups (%rdi), %zmm1 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovaps %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: test11a:
@@ -501,7 +501,7 @@ define <8 x i32> @test11b(<8 x i1> %mask
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: test11b:
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Mon Sep 5 01:43:06 2016
@@ -360,47 +360,26 @@ define <2 x i64> @mul_v2i64spill(<2 x i6
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: mul_v2i64spill:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: subq $40, %rsp
-; AVX2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
-; AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX2-NEXT: callq foo
-; AVX2-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
-; AVX2-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
-; AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
-; AVX2-NEXT: vpsrlq $32, %xmm2, %xmm1
-; AVX2-NEXT: vmovdqa %xmm2, %xmm3
-; AVX2-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq $32, %xmm4, %xmm2
-; AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: addq $40, %rsp
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: mul_v2i64spill:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: subq $40, %rsp
-; AVX512-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
-; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX512-NEXT: callq foo
-; AVX512-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
-; AVX512-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
-; AVX512-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
-; AVX512-NEXT: vpsrlq $32, %xmm2, %xmm1
-; AVX512-NEXT: vmovdqa64 %zmm2, %zmm3
-; AVX512-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
-; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq $32, %xmm4, %xmm2
-; AVX512-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
-; AVX512-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: addq $40, %rsp
-; AVX512-NEXT: retq
+; AVX-LABEL: mul_v2i64spill:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: callq foo
+; AVX-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; AVX-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
+; AVX-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
+; AVX-NEXT: vpsrlq $32, %xmm2, %xmm1
+; AVX-NEXT: vmovdqa %xmm2, %xmm3
+; AVX-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
+; AVX-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX-NEXT: vpsrlq $32, %xmm4, %xmm2
+; AVX-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
entry:
; Use a call to force spills.
call void @foo()
Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Mon Sep 5 01:43:06 2016
@@ -168,7 +168,7 @@ define <32 x i16> @sext_32i8_to_32i16(<3
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1
-; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: sext_32i8_to_32i16:
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=280648&r1=280647&r2=280648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Mon Sep 5 01:43:06 2016
@@ -145,7 +145,7 @@ define <32 x i16> @zext_32i8_to_32i16(<3
; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: zext_32i8_to_32i16:
More information about the llvm-commits
mailing list