[llvm] r345602 - [DAGCombiner] narrow vector binops when extraction is cheap
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 30 07:14:35 PDT 2018
Author: spatel
Date: Tue Oct 30 07:14:34 2018
New Revision: 345602
URL: http://llvm.org/viewvc/llvm-project?rev=345602&view=rev
Log:
[DAGCombiner] narrow vector binops when extraction is cheap
Narrowing vector binops came up in the demanded bits discussion in D52912.
I don't think we're going to be able to do this transform in IR as a canonicalization
because of the risk of creating unsupported widths for vector ops, but we already have
a DAG TLI hook to allow what I was hoping for: isExtractSubvectorCheap(). This is
currently enabled for x86, ARM, and AArch64 (although only x86 has existing regression
test diffs).
This is artificially limited to not look through bitcasts because there are so many
test diffs already, but that's marked with a TODO and is a small follow-up.
Differential Revision: https://reviews.llvm.org/D53784
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/2012-04-26-sdglue.ll
llvm/trunk/test/CodeGen/X86/avx-logic.ll
llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll
llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
llvm/trunk/test/CodeGen/X86/madd.ll
llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
llvm/trunk/test/CodeGen/X86/sad.ll
llvm/trunk/test/CodeGen/X86/shrink_vmul.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-and.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-or.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-xor.ll
llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll
llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Oct 30 07:14:34 2018
@@ -16673,10 +16673,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS
return SDValue();
}
-/// If we are extracting a subvector produced by a wide binary operator with at
-/// at least one operand that was the result of a vector concatenation, then try
-/// to use the narrow vector operands directly to avoid the concatenation and
-/// extraction.
+/// If we are extracting a subvector produced by a wide binary operator try
+/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
@@ -16697,22 +16695,43 @@ static SDValue narrowExtractedVectorBinO
if (!WideBVT.isVector())
return SDValue();
+ EVT VT = Extract->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+ assert(ExtractIndex % NumElems == 0 &&
+ "Extract index is not a multiple of the vector length.");
+ EVT SrcVT = Extract->getOperand(0).getValueType();
+ unsigned NumSrcElems = SrcVT.getVectorNumElements();
+ unsigned NarrowingRatio = NumSrcElems / NumElems;
+
// Bail out if the target does not support a narrower version of the binop.
unsigned BOpcode = BinOp.getOpcode();
+ unsigned WideNumElts = WideBVT.getVectorNumElements();
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
- WideBVT.getVectorNumElements() / 2);
+ WideNumElts / NarrowingRatio);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
+ // If extraction is cheap, we don't need to look at the binop operands
+ // for concat ops. The narrow binop alone makes this transform profitable.
+ // TODO: We're not dealing with the bitcasted pattern here. That limitation
+ // should be lifted.
+ if (Extract->getOperand(0) == BinOp && BinOp.hasOneUse() &&
+ TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtractIndex)) {
+ // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
+ SDLoc DL(Extract);
+ SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), Extract->getOperand(1));
+ SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), Extract->getOperand(1));
+ return DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
+ BinOp.getNode()->getFlags());
+ }
+
// Only handle the case where we are doubling and then halving. A larger ratio
// may require more than two narrow binops to replace the wide binop.
- EVT VT = Extract->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- unsigned ExtractIndex = ExtractIndexC->getZExtValue();
- assert(ExtractIndex % NumElems == 0 &&
- "Extract index is not a multiple of the vector length.");
- if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
+ if (NarrowingRatio != 2)
return SDValue();
// TODO: The motivating case for this transform is an x86 AVX1 target. That
Modified: llvm/trunk/test/CodeGen/X86/2012-04-26-sdglue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-04-26-sdglue.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-04-26-sdglue.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2012-04-26-sdglue.ll Tue Oct 30 07:14:34 2018
@@ -7,23 +7,19 @@
define void @func(<4 x float> %a, <16 x i8> %b, <16 x i8> %c, <8 x float> %d, <8 x float> %e, <8 x float>* %f) nounwind ssp {
; CHECK-LABEL: func:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovdqu 0, %xmm3
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
-; CHECK-NEXT: vpalignr {{.*#+}} xmm1 = xmm3[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
-; CHECK-NEXT: vmovdqu 32, %xmm3
-; CHECK-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3]
-; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0
-; CHECK-NEXT: vmulps %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vmovdqu 0, %xmm0
+; CHECK-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vmulps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vhaddps %ymm4, %ymm0, %ymm0
; CHECK-NEXT: vsubps %ymm0, %ymm0, %ymm0
-; CHECK-NEXT: vhaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vhaddps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-logic.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-logic.ll Tue Oct 30 07:14:34 2018
@@ -338,17 +338,17 @@ define <8 x i32> @and_disguised_i8_elts(
define <8 x i32> @andn_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX1-LABEL: andn_disguised_i8_elts:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255]
; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpandn %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: andn_disguised_i8_elts:
@@ -417,17 +417,17 @@ define <8 x i32> @andn_constant_mask_ope
define <8 x i32> @andn_variable_mask_operand_concat(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z, <8 x i32> %w) {
; AVX1-LABEL: andn_variable_mask_operand_concat:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
-; AVX1-NEXT: vpaddd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
-; AVX1-NEXT: vpandn %xmm1, %xmm4, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpandn %xmm2, %xmm4, %xmm1
+; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: andn_variable_mask_operand_concat:
Modified: llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll Tue Oct 30 07:14:34 2018
@@ -96,28 +96,24 @@ define <8 x float> @test01(<4 x float> %
define <4 x float> @test02(<8 x float> %a, <8 x float> %b) nounwind {
; VZ-LABEL: test02:
; VZ: # %bb.0:
-; VZ-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; VZ-NEXT: vaddps %xmm1, %xmm0, %xmm0
; VZ-NEXT: vzeroupper
; VZ-NEXT: jmp do_sse # TAILCALL
;
; FAST-ymm-zmm-LABEL: test02:
; FAST-ymm-zmm: # %bb.0:
-; FAST-ymm-zmm-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; FAST-ymm-zmm-NEXT: vaddps %xmm1, %xmm0, %xmm0
; FAST-ymm-zmm-NEXT: jmp do_sse # TAILCALL
;
; BDVER2-LABEL: test02:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0
; BDVER2-NEXT: vzeroupper
; BDVER2-NEXT: jmp do_sse # TAILCALL
;
; BTVER2-LABEL: test02:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: jmp do_sse # TAILCALL
%add.i = fadd <8 x float> %a, %b
%add.low = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %add.i, i8 0)
Modified: llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll Tue Oct 30 07:14:34 2018
@@ -8,7 +8,7 @@ define i32 @hadd_16(<16 x i32> %x225) {
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; KNL-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; KNL-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovd %xmm0, %eax
; KNL-NEXT: retq
;
@@ -17,7 +17,7 @@ define i32 @hadd_16(<16 x i32> %x225) {
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; SKX-NEXT: vmovd %xmm0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -35,7 +35,7 @@ define i32 @hsub_16(<16 x i32> %x225) {
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; KNL-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovd %xmm0, %eax
; KNL-NEXT: retq
;
@@ -44,7 +44,7 @@ define i32 @hsub_16(<16 x i32> %x225) {
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; SKX-NEXT: vmovd %xmm0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -62,8 +62,7 @@ define float @fhadd_16(<16 x float> %x22
; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; KNL-NEXT: vaddps %zmm1, %zmm0, %zmm0
; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; KNL-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: fhadd_16:
@@ -71,8 +70,7 @@ define float @fhadd_16(<16 x float> %x22
; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0
; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; SKX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -89,8 +87,7 @@ define float @fhsub_16(<16 x float> %x22
; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; KNL-NEXT: vaddps %zmm1, %zmm0, %zmm0
; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; KNL-NEXT: vsubps %zmm1, %zmm0, %zmm0
-; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; KNL-NEXT: vsubps %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: fhsub_16:
@@ -98,8 +95,7 @@ define float @fhsub_16(<16 x float> %x22
; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0
; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SKX-NEXT: vsubps %zmm1, %zmm0, %zmm0
-; SKX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -180,16 +176,14 @@ define <4 x double> @fadd_noundef_low(<8
; KNL: # %bb.0:
; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; KNL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
-; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; KNL-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: fadd_noundef_low:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
-; SKX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; SKX-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; SKX-NEXT: retq
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
@@ -203,16 +197,18 @@ define <4 x double> @fadd_noundef_high(<
; KNL: # %bb.0:
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
-; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; KNL-NEXT: vextractf64x4 $1, %zmm2, %ymm1
+; KNL-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: fadd_noundef_high:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
-; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vextractf64x4 $1, %zmm2, %ymm1
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; SKX-NEXT: retq
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
@@ -227,16 +223,14 @@ define <8 x i32> @hadd_16_3_sv(<16 x i32
; KNL: # %bb.0:
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; KNL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; KNL-NEXT: vpaddd %zmm0, %zmm2, %zmm0
-; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; KNL-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: hadd_16_3_sv:
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; SKX-NEXT: vpaddd %zmm0, %zmm2, %zmm0
-; SKX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; SKX-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; SKX-NEXT: retq
%x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
, i32 4, i32 6, i32 20, i32 22, i32 8, i32 10, i32 24, i32 26, i32 12, i32 14, i32 28, i32 30>
@@ -253,15 +247,13 @@ define double @fadd_noundef_eel(<8 x dou
; KNL-LABEL: fadd_noundef_eel:
; KNL: # %bb.0:
; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; KNL-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; KNL-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: fadd_noundef_eel:
; SKX: # %bb.0:
; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; SKX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -277,18 +269,18 @@ define double @fsub_noundef_ee (<8 x dou
; KNL-LABEL: fsub_noundef_ee:
; KNL: # %bb.0:
; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0
-; KNL-NEXT: vbroadcastsd %xmm0, %zmm0
-; KNL-NEXT: vsubpd %zmm1, %zmm0, %zmm0
-; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
+; KNL-NEXT: vbroadcastsd %xmm0, %zmm1
+; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm1
+; KNL-NEXT: vsubpd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; KNL-NEXT: retq
;
; SKX-LABEL: fsub_noundef_ee:
; SKX: # %bb.0:
; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0
-; SKX-NEXT: vsubpd %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
+; SKX-NEXT: vbroadcastsd %xmm0, %zmm1
+; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm1
+; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Tue Oct 30 07:14:34 2018
@@ -851,7 +851,7 @@ define i8 @test_iinsertelement_v4i1(i32
; SKX-NEXT: kxorb %k2, %k1, %k1
; SKX-NEXT: kshiftlb $7, %k1, %k1
; SKX-NEXT: kshiftrb $5, %k1, %k1
-; SKX-NEXT: kxorb %k1, %k0, %k0
+; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $al killed $al killed $eax
; SKX-NEXT: retq
@@ -890,7 +890,7 @@ define i8 @test_iinsertelement_v2i1(i32
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: kshiftlb $1, %k1, %k1
-; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: korw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $al killed $al killed $eax
; SKX-NEXT: retq
@@ -1019,8 +1019,8 @@ define zeroext i8 @test_extractelement_v
; KNL: ## %bb.0:
; KNL-NEXT: vpminub %ymm3, %ymm1, %ymm0
; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
@@ -1054,8 +1054,8 @@ define zeroext i8 @extractelement_v64i1_
; KNL: ## %bb.0:
; KNL-NEXT: vpminub %ymm3, %ymm1, %ymm0
; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Tue Oct 30 07:14:34 2018
@@ -73,7 +73,7 @@ define <4 x i1> @test5(<2 x i1> %a, <2 x
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-NEXT: vpmovq2m %xmm0, %k1
; CHECK-NEXT: kshiftlb $2, %k0, %k0
-; CHECK-NEXT: korb %k0, %k1, %k0
+; CHECK-NEXT: korw %k0, %k1, %k0
; CHECK-NEXT: vpmovm2d %k0, %xmm0
; CHECK-NEXT: retq
@@ -89,7 +89,7 @@ define <16 x i1> @test6(<2 x i1> %a, <2
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-NEXT: vpmovq2m %xmm0, %k1
; CHECK-NEXT: kshiftlb $2, %k0, %k0
-; CHECK-NEXT: korb %k0, %k1, %k0
+; CHECK-NEXT: korw %k0, %k1, %k0
; CHECK-NEXT: vpmovm2b %k0, %xmm0
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Tue Oct 30 07:14:34 2018
@@ -240,21 +240,13 @@ define <4 x double> @signbits_sext_shuff
define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
; X32: # %bb.0:
-; X32-NEXT: vpsrad $16, %xmm0, %xmm1
-; X32-NEXT: vpsrlq $16, %xmm0, %xmm0
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; X32-NEXT: vpsrlq $16, %xmm0, %xmm0
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-NEXT: vcvtdq2pd %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
; X64: # %bb.0:
-; X64-NEXT: vpsrad $16, %xmm0, %xmm1
-; X64-NEXT: vpsrlq $16, %xmm0, %xmm0
-; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; X64-NEXT: vpsrlq $16, %xmm0, %xmm0
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X64-NEXT: vcvtdq2pd %xmm0, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 16, i64 16>
Modified: llvm/trunk/test/CodeGen/X86/madd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/madd.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/madd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/madd.ll Tue Oct 30 07:14:34 2018
@@ -156,7 +156,7 @@ define i32 @_Z10test_shortPsS_i_256(i16*
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vmovd %xmm0, %eax
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
@@ -283,7 +283,7 @@ define i32 @_Z10test_shortPsS_i_512(i16*
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -310,7 +310,7 @@ define i32 @_Z10test_shortPsS_i_512(i16*
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -476,7 +476,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -508,7 +508,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -537,7 +537,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -739,7 +739,7 @@ define i32 @_Z9test_charPcS_i_256(i8* no
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vmovd %xmm0, %eax
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
@@ -875,7 +875,7 @@ define i32 @_Z9test_charPcS_i_512(i8* no
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -903,7 +903,7 @@ define i32 @_Z9test_charPcS_i_512(i8* no
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1087,7 +1087,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* n
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1121,7 +1121,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* n
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -1151,7 +1151,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* n
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1355,7 +1355,7 @@ define i32 @test_unsigned_short_256(i16*
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vmovd %xmm0, %eax
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
@@ -1510,7 +1510,7 @@ define i32 @test_unsigned_short_512(i16*
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1538,7 +1538,7 @@ define i32 @test_unsigned_short_512(i16*
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1763,7 +1763,7 @@ define i32 @test_unsigned_short_1024(i16
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1797,7 +1797,7 @@ define i32 @test_unsigned_short_1024(i16
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -2730,7 +2730,7 @@ define i32 @madd_double_reduction(<8 x i
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX256-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vmovd %xmm0, %eax
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll (original)
+++ llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll Tue Oct 30 07:14:34 2018
@@ -191,7 +191,7 @@ define i32 @_Z9test_charPcS_i_256(i8* no
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -257,7 +257,7 @@ define i32 @_Z9test_charPcS_i_512(i8* no
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -321,7 +321,7 @@ define i32 @sad_16i8_256() "min-legal-ve
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -383,7 +383,7 @@ define i32 @sad_16i8_512() "min-legal-ve
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sad.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sad.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sad.ll Tue Oct 30 07:14:34 2018
@@ -82,7 +82,7 @@ define i32 @sad_16i8() nounwind {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -107,7 +107,7 @@ define i32 @sad_16i8() nounwind {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -347,7 +347,7 @@ define i32 @sad_32i8() nounwind {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -374,7 +374,7 @@ define i32 @sad_32i8() nounwind {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -941,7 +941,7 @@ define i32 @sad_avx64i8() nounwind {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -989,7 +989,7 @@ define i32 @sad_avx64i8() nounwind {
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -1018,7 +1018,7 @@ define i32 @sad_avx64i8() nounwind {
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1456,7 +1456,7 @@ define i32 @sad_unroll_nonzero_initial(<
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1478,7 +1478,7 @@ define i32 @sad_unroll_nonzero_initial(<
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1558,7 +1558,7 @@ define i32 @sad_double_reduction(<16 x i
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1577,7 +1577,7 @@ define i32 @sad_double_reduction(<16 x i
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/shrink_vmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shrink_vmul.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shrink_vmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shrink_vmul.ll Tue Oct 30 07:14:34 2018
@@ -2475,7 +2475,7 @@ define void @PR34947(<9 x i16>* %p0, <9
; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; X86-AVX2-NEXT: movl $8199, %eax # imm = 0x2007
; X86-AVX2-NEXT: vmovd %eax, %xmm2
-; X86-AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
; X86-AVX2-NEXT: popl %esi
@@ -2723,7 +2723,7 @@ define void @PR34947(<9 x i16>* %p0, <9
; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; X64-AVX2-NEXT: movl $8199, %eax # imm = 0x2007
; X64-AVX2-NEXT: vmovd %eax, %xmm2
-; X64-AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
; X64-AVX2-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Tue Oct 30 07:14:34 2018
@@ -663,32 +663,16 @@ define <2 x double> @uitofp_4i32_to_2f64
; SSE41-NEXT: addpd %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: uitofp_4i32_to_2f64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: uitofp_4i32_to_2f64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
-; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; VEX-LABEL: uitofp_4i32_to_2f64:
+; VEX: # %bb.0:
+; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
+; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
+; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
+; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i32_to_2f64:
; AVX512F: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll Tue Oct 30 07:14:34 2018
@@ -64,7 +64,7 @@ define i64 @test_v4f64_sext(<4 x double>
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vandpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -200,7 +200,7 @@ define i32 @test_v8f32_sext(<8 x float>
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -344,7 +344,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -510,7 +510,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -668,7 +668,7 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -695,7 +695,7 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -870,7 +870,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -899,7 +899,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll Tue Oct 30 07:14:34 2018
@@ -62,7 +62,7 @@ define i64 @test_v4f64_sext(<4 x double>
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vorpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -188,7 +188,7 @@ define i32 @test_v8f32_sext(<8 x float>
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -324,7 +324,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -477,7 +477,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -623,7 +623,7 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -649,7 +649,7 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -812,7 +812,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -840,7 +840,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll Tue Oct 30 07:14:34 2018
@@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -107,7 +107,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -119,7 +119,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -169,7 +169,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -182,7 +182,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -255,7 +255,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -267,7 +267,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -311,7 +311,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -325,7 +325,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -381,7 +381,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -396,7 +396,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -488,7 +488,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -503,7 +503,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -557,7 +557,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -574,7 +574,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -640,7 +640,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -658,7 +658,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -798,7 +798,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -815,7 +815,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -895,7 +895,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -914,7 +914,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -1010,7 +1010,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1030,7 +1030,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-and.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-and.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-and.ll Tue Oct 30 07:14:34 2018
@@ -49,7 +49,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -94,7 +94,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -105,7 +105,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -117,7 +117,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -148,7 +148,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -161,7 +161,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -174,7 +174,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -235,7 +235,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -247,7 +247,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -259,7 +259,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -288,7 +288,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -301,7 +301,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -315,7 +315,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -350,7 +350,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -365,7 +365,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -380,7 +380,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -457,7 +457,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -472,7 +472,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -487,7 +487,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -523,7 +523,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -539,7 +539,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -556,7 +556,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -598,7 +598,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -616,7 +616,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -634,7 +634,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -757,7 +757,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -774,7 +774,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -791,7 +791,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -851,7 +851,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -869,7 +869,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -888,7 +888,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -958,7 +958,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -978,7 +978,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -998,7 +998,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll Tue Oct 30 07:14:34 2018
@@ -107,8 +107,7 @@ define float @test_v8f32(float %a0, <8 x
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -119,8 +118,7 @@ define float @test_v8f32(float %a0, <8 x
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
@@ -161,8 +159,7 @@ define float @test_v16f32(float %a0, <16
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -175,8 +172,7 @@ define float @test_v16f32(float %a0, <16
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float %a0, <16 x float> %a1)
@@ -287,8 +283,7 @@ define float @test_v8f32_zero(<8 x float
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -299,8 +294,7 @@ define float @test_v8f32_zero(<8 x float
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0)
@@ -342,8 +336,7 @@ define float @test_v16f32_zero(<16 x flo
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -356,8 +349,7 @@ define float @test_v16f32_zero(<16 x flo
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0)
@@ -468,8 +460,7 @@ define float @test_v8f32_undef(<8 x floa
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -480,8 +471,7 @@ define float @test_v8f32_undef(<8 x floa
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float undef, <8 x float> %a0)
@@ -523,8 +513,7 @@ define float @test_v16f32_undef(<16 x fl
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -537,8 +526,7 @@ define float @test_v16f32_undef(<16 x fl
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float undef, <16 x float> %a0)
@@ -586,8 +574,7 @@ define double @test_v4f64(double %a0, <4
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -596,8 +583,7 @@ define double @test_v4f64(double %a0, <4
; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
@@ -621,8 +607,7 @@ define double @test_v8f64(double %a0, <8
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -633,8 +618,7 @@ define double @test_v8f64(double %a0, <8
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double %a0, <8 x double> %a1)
@@ -664,8 +648,7 @@ define double @test_v16f64(double %a0, <
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -677,8 +660,7 @@ define double @test_v16f64(double %a0, <
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double %a0, <16 x double> %a1)
@@ -728,8 +710,7 @@ define double @test_v4f64_zero(<4 x doub
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -738,8 +719,7 @@ define double @test_v4f64_zero(<4 x doub
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0)
@@ -764,8 +744,7 @@ define double @test_v8f64_zero(<8 x doub
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -776,8 +755,7 @@ define double @test_v8f64_zero(<8 x doub
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0)
@@ -807,8 +785,7 @@ define double @test_v16f64_zero(<16 x do
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -820,8 +797,7 @@ define double @test_v16f64_zero(<16 x do
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0)
@@ -871,8 +847,7 @@ define double @test_v4f64_undef(<4 x dou
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -881,8 +856,7 @@ define double @test_v4f64_undef(<4 x dou
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double undef, <4 x double> %a0)
@@ -907,8 +881,7 @@ define double @test_v8f64_undef(<8 x dou
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -919,8 +892,7 @@ define double @test_v8f64_undef(<8 x dou
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double undef, <8 x double> %a0)
@@ -950,8 +922,7 @@ define double @test_v16f64_undef(<16 x d
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -963,8 +934,7 @@ define double @test_v16f64_undef(<16 x d
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double undef, <16 x double> %a0)
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll Tue Oct 30 07:14:34 2018
@@ -107,8 +107,7 @@ define float @test_v8f32(float %a0, <8 x
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -119,8 +118,7 @@ define float @test_v8f32(float %a0, <8 x
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1)
@@ -161,8 +159,7 @@ define float @test_v16f32(float %a0, <16
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -175,8 +172,7 @@ define float @test_v16f32(float %a0, <16
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1)
@@ -287,8 +283,7 @@ define float @test_v8f32_zero(<8 x float
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -299,8 +294,7 @@ define float @test_v8f32_zero(<8 x float
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0)
@@ -342,8 +336,7 @@ define float @test_v16f32_zero(<16 x flo
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -356,8 +349,7 @@ define float @test_v16f32_zero(<16 x flo
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0)
@@ -468,8 +460,7 @@ define float @test_v8f32_undef(<8 x floa
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -480,8 +471,7 @@ define float @test_v8f32_undef(<8 x floa
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0)
@@ -523,8 +513,7 @@ define float @test_v16f32_undef(<16 x fl
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -537,8 +526,7 @@ define float @test_v16f32_undef(<16 x fl
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0)
@@ -586,8 +574,7 @@ define double @test_v4f64(double %a0, <4
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX-NEXT: vmulpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -596,8 +583,7 @@ define double @test_v4f64(double %a0, <4
; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX512-NEXT: vmulpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1)
@@ -621,8 +607,7 @@ define double @test_v8f64(double %a0, <8
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -633,8 +618,7 @@ define double @test_v8f64(double %a0, <8
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1)
@@ -664,8 +648,7 @@ define double @test_v16f64(double %a0, <
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -677,8 +660,7 @@ define double @test_v16f64(double %a0, <
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1)
@@ -728,8 +710,7 @@ define double @test_v4f64_zero(<4 x doub
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -738,8 +719,7 @@ define double @test_v4f64_zero(<4 x doub
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0)
@@ -764,8 +744,7 @@ define double @test_v8f64_zero(<8 x doub
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -776,8 +755,7 @@ define double @test_v8f64_zero(<8 x doub
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0)
@@ -807,8 +785,7 @@ define double @test_v16f64_zero(<16 x do
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -820,8 +797,7 @@ define double @test_v16f64_zero(<16 x do
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0)
@@ -871,8 +847,7 @@ define double @test_v4f64_undef(<4 x dou
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -881,8 +856,7 @@ define double @test_v4f64_undef(<4 x dou
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0)
@@ -907,8 +881,7 @@ define double @test_v8f64_undef(<8 x dou
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -919,8 +892,7 @@ define double @test_v8f64_undef(<8 x dou
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0)
@@ -950,8 +922,7 @@ define double @test_v16f64_undef(<16 x d
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -963,8 +934,7 @@ define double @test_v16f64_undef(<16 x d
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0)
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll Tue Oct 30 07:14:34 2018
@@ -160,7 +160,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -184,7 +184,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BW-NEXT: vpaddq %ymm3, %ymm2, %ymm2
; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -208,7 +208,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -229,7 +229,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQVL-NEXT: vpmullq %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512DQVL-NEXT: vpmullq %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
@@ -352,7 +352,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -385,7 +385,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -418,7 +418,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -442,7 +442,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
@@ -655,7 +655,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -696,7 +696,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -737,7 +737,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -763,7 +763,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
@@ -872,7 +872,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -884,7 +884,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -955,7 +955,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -969,7 +969,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1064,7 +1064,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1079,7 +1079,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1171,7 +1171,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -1186,7 +1186,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -1240,7 +1240,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -1257,7 +1257,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -1274,7 +1274,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BWVL-NEXT: vzeroupper
@@ -1290,7 +1290,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: vzeroupper
@@ -1306,7 +1306,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQVL-NEXT: vzeroupper
@@ -1372,7 +1372,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -1390,7 +1390,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -1408,7 +1408,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BWVL-NEXT: vzeroupper
@@ -1426,7 +1426,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: vzeroupper
@@ -1444,7 +1444,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQVL-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-or.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-or.ll Tue Oct 30 07:14:34 2018
@@ -49,7 +49,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -94,7 +94,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -105,7 +105,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -117,7 +117,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -148,7 +148,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -161,7 +161,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -174,7 +174,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -235,7 +235,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -247,7 +247,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -259,7 +259,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -288,7 +288,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -301,7 +301,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -315,7 +315,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -350,7 +350,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -365,7 +365,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -380,7 +380,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -457,7 +457,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -472,7 +472,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -487,7 +487,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -523,7 +523,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -539,7 +539,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -556,7 +556,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -598,7 +598,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -616,7 +616,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -634,7 +634,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -757,7 +757,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -774,7 +774,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -791,7 +791,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -851,7 +851,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -869,7 +869,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -888,7 +888,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -958,7 +958,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -978,7 +978,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -998,7 +998,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-xor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-xor.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-xor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-xor.ll Tue Oct 30 07:14:34 2018
@@ -49,7 +49,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -94,7 +94,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -105,7 +105,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -117,7 +117,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -148,7 +148,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -161,7 +161,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -174,7 +174,7 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -235,7 +235,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -247,7 +247,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -259,7 +259,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -288,7 +288,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -301,7 +301,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -315,7 +315,7 @@ define i32 @test_v16i32(<16 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -350,7 +350,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -365,7 +365,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -380,7 +380,7 @@ define i32 @test_v32i32(<32 x i32> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -457,7 +457,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -472,7 +472,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -487,7 +487,7 @@ define i16 @test_v16i16(<16 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -523,7 +523,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -539,7 +539,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -556,7 +556,7 @@ define i16 @test_v32i16(<32 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -598,7 +598,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -616,7 +616,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -634,7 +634,7 @@ define i16 @test_v64i16(<64 x i16> %a0)
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -757,7 +757,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -774,7 +774,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -791,7 +791,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -851,7 +851,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -869,7 +869,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -888,7 +888,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -958,7 +958,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -978,7 +978,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -998,7 +998,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll Tue Oct 30 07:14:34 2018
@@ -690,8 +690,8 @@ define <16 x i16> @splatvar_rotate_v16i1
; AVX2-NEXT: vpbroadcastw %xmm1, %ymm2
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm1
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpsubw %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
@@ -702,8 +702,8 @@ define <16 x i16> @splatvar_rotate_v16i1
; AVX512F-NEXT: vpbroadcastw %xmm1, %ymm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm1, %ymm0
@@ -714,8 +714,8 @@ define <16 x i16> @splatvar_rotate_v16i1
; AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm0, %ymm1, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll?rev=345602&r1=345601&r2=345602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll Tue Oct 30 07:14:34 2018
@@ -316,8 +316,8 @@ define <32 x i16> @splatvar_rotate_v32i1
; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm3
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm4
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %ymm3, %ymm5, %ymm3
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm3, %xmm5, %xmm3
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0
@@ -331,8 +331,8 @@ define <32 x i16> @splatvar_rotate_v32i1
; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm3
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %ymm3, %ymm5, %ymm3
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm3, %xmm5, %xmm3
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0
@@ -468,14 +468,14 @@ define <64 x i8> @splatvar_rotate_v64i8(
; AVX512BW-LABEL: splatvar_rotate_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm2
-; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpsubb %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm3
; AVX512BW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
; AVX512BW-NEXT: vpsllw %xmm1, %zmm4, %zmm1
; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm3, %zmm1
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512BW-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw %xmm2, %zmm4, %zmm2
@@ -488,14 +488,14 @@ define <64 x i8> @splatvar_rotate_v64i8(
; AVX512VLBW-LABEL: splatvar_rotate_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm2
-; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512VLBW-NEXT: vpsubb %zmm2, %zmm3, %zmm2
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm4, %zmm1
; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
; AVX512VLBW-NEXT: vpandq %zmm1, %zmm3, %zmm1
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm4, %zmm2
More information about the llvm-commits
mailing list