[llvm] [DAG] narrowExtractedVectorBinOp - ensure we limit late node creation to LegalOperations only (PR #72130)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 20 02:36:49 PST 2023
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/72130
>From b5a4460bfb67f7a1d2981da04951d7bc96577ef6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 13 Nov 2023 16:04:24 +0000
Subject: [PATCH 1/2] [DAG] narrowExtractedVectorBinOp - ensure we limit late
node creation to LegalOperations only
Avoids infinite issues in some upcoming patches to help D152928 - x86 sees a number of regressions that will be addressed in follow-ups
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +-
.../CostModel/AArch64/vector-select.ll | 4 +-
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 13 +-
.../test/CodeGen/X86/avx512-insert-extract.ll | 14 +-
.../CodeGen/X86/combine-avx2-intrinsics.ll | 2 +-
.../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 17 +-
.../CodeGen/X86/horizontal-reduce-smax.ll | 24 +-
.../CodeGen/X86/horizontal-reduce-smin.ll | 24 +-
.../CodeGen/X86/horizontal-reduce-umax.ll | 58 ++---
.../CodeGen/X86/horizontal-reduce-umin.ll | 58 ++---
llvm/test/CodeGen/X86/kshift.ll | 10 +-
llvm/test/CodeGen/X86/var-permute-128.ll | 6 +-
.../X86/vector-reduce-fmax-fmin-fast.ll | 28 +--
.../CodeGen/X86/vector-reduce-fmax-nnan.ll | 28 +--
.../CodeGen/X86/vector-reduce-fmin-nnan.ll | 28 +--
llvm/test/CodeGen/X86/vector-reduce-mul.ll | 216 +++++++++---------
llvm/test/CodeGen/X86/vector-reduce-smax.ll | 18 +-
llvm/test/CodeGen/X86/vector-reduce-smin.ll | 18 +-
llvm/test/CodeGen/X86/vector-reduce-umax.ll | 42 ++--
llvm/test/CodeGen/X86/vector-reduce-umin.ll | 42 ++--
20 files changed, 328 insertions(+), 325 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4ec0dc3a995b13b..2fc9a2866c32dba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24076,7 +24076,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
+ LegalOperations))
return SDValue();
// If extraction is cheap, we don't need to look at the binop operands
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index 4477bfbf577f980..cf57479c395980a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -125,10 +125,10 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CODE: mov
; CODE: mov
; CODE: mov
-; CODE: ldr
-; CODE: cmge
; CODE: cmge
+; CODE: ldr
; CODE: bif
+; CODE: cmge
; CODE: bif
; CODE: ext
; CODE: ret
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index df35b4ecb3d6623..5e477e8947d1b86 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1860,15 +1860,14 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
; CHECK-LABEL: umaxv_v3i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v3.16b, v2.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov v3.16b, v0.16b
-; CHECK-NEXT: mov v4.16b, v2.16b
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: mov v3.d[1], v1.d[0]
-; CHECK-NEXT: mov v4.d[1], xzr
-; CHECK-NEXT: cmhi v3.2d, v3.2d, v4.2d
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: mov v3.d[1], xzr
+; CHECK-NEXT: cmhi v3.2d, v0.2d, v3.2d
; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: bif v0.8b, v2.8b, v3.8b
+; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
; CHECK-NEXT: and v1.8b, v1.8b, v4.8b
; CHECK-NEXT: cmhi d2, d0, d1
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
@@ -1930,4 +1929,4 @@ define i128 @umaxv_v2i128(<2 x i128> %a) {
entry:
%arg1 = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
ret i128 %arg1
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 40cbbfe2c14e5d4..6d7c9701f14f42d 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1048,8 +1048,8 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) nounwind
define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) nounwind {
; KNL-LABEL: test_extractelement_v32i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1077,11 +1077,10 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwin
; KNL-LABEL: test_extractelement_v64i1:
; KNL: ## %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1113,11 +1112,10 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind
; KNL-LABEL: extractelement_v64i1_alt:
; KNL: ## %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll b/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
index 514e8d1fdd585a0..9cd606557dcfd78 100644
--- a/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
@@ -113,7 +113,7 @@ define <4 x i32> @demandedelts_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: demandedelts_vpsrlvq:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index f4c43ff0cf9c875..063182fcecf3e43 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -1068,18 +1068,21 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
;
; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: subq $40, %rsp
-; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; CHECK-AVX2-NEXT: vpsllvd %xmm0, %xmm1, %xmm0
-; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-AVX2-NEXT: subq $56, %rsp
+; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,0,0,2,2,0,0]
+; CHECK-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
+; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
+; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
+; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: callq __truncsfhf2 at PLT
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
+; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: callq __truncsfhf2 at PLT
; CHECK-AVX2-NEXT: callq __extendhfsf2 at PLT
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1092,7 +1095,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
; CHECK-AVX2-NEXT: callq __truncsfhf2 at PLT
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; CHECK-AVX2-NEXT: addq $40, %rsp
+; CHECK-AVX2-NEXT: addq $56, %rsp
; CHECK-AVX2-NEXT: retq
;
; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
index 93049f9987a5e9b..f96aca9ac98d5f5 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
@@ -480,10 +480,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -553,10 +553,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-LABEL: test_reduce_v4i64:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1147,10 +1147,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1264,10 +1264,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index 47bb0957f3fbb6a..cd1698431244c60 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -483,10 +483,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -557,10 +557,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-LABEL: test_reduce_v4i64:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1151,10 +1151,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1268,10 +1268,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index 5fde9bd5566b400..ef4e8c595cb8600 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -545,15 +545,15 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
-; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
+; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
+; X86-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
+; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -636,14 +636,14 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
-; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
+; X64-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
+; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1256,14 +1256,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
-; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
+; X86-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
+; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1401,14 +1401,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
-; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
+; X64-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
+; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 699dce75e505c77..7b09c73f8f74469 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -489,15 +489,15 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
-; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
-; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
+; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
+; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
+; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -582,14 +582,14 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
-; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
-; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
+; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
+; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1172,14 +1172,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
+; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
+; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1319,14 +1319,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
+; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
+; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll
index f4efacc1946cff9..5ba96d72960e70e 100644
--- a/llvm/test/CodeGen/X86/kshift.ll
+++ b/llvm/test/CodeGen/X86/kshift.ll
@@ -267,13 +267,13 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftl_v64i1_63:
; KNL: # %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
@@ -563,13 +563,13 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftr_v64i1_63:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k1
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index 99a3821bb9ba91e..aa11ec54c80ac9f 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -1010,20 +1010,22 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
;
; AVX2-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpshufb %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512-NEXT: vpshufb %xmm1, %xmm2, %xmm2
; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
index 5bd9b0292a8f04e..e9b156b360752db 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
@@ -107,9 +107,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX-LABEL: test_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -118,9 +118,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-LABEL: test_v8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -159,9 +159,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -172,9 +172,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -222,7 +222,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -231,7 +231,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -255,7 +255,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -266,7 +266,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -296,7 +296,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -308,7 +308,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 71c4427da96253e..963cc3ccb7f66a7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -112,9 +112,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX-LABEL: test_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -123,9 +123,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-LABEL: test_v8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -164,9 +164,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -177,9 +177,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -269,7 +269,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -278,7 +278,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -302,7 +302,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -313,7 +313,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -343,7 +343,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -355,7 +355,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index 0b2f9d69f0623c7..80f2b7628bb0717 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -153,9 +153,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX-LABEL: test_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -164,9 +164,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-LABEL: test_v8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -205,9 +205,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -218,9 +218,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -268,7 +268,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -277,7 +277,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -301,7 +301,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -312,7 +312,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -342,7 +342,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -354,7 +354,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 0103b7622dc3e2f..f46c8ec4c48bd70 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -142,22 +142,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -166,22 +166,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BW-LABEL: test_v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -190,22 +190,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BWVL-LABEL: test_v4i64:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -333,22 +333,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -366,22 +366,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
+; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -399,22 +399,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
+; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
+; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BWVL-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -635,22 +635,22 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -676,22 +676,22 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
+; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -717,22 +717,22 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
+; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
+; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
+; AVX512BWVL-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 80b6b7e27e89282..70ef2faf53b4337 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -180,10 +180,10 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -372,10 +372,10 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -697,10 +697,10 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 7ead5a0eda6d7d6..39e91a39825ea5f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -180,10 +180,10 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -372,10 +372,10 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -697,10 +697,10 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index 4799b8e7e5857b4..620b99d2652a15b 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -211,14 +211,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
+; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -431,14 +431,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
+; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -809,14 +809,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm2
-; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm3
-; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
+; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm2
-; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm3
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
+; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index 75eeec456c9ac38..b9c7d006a4afb22 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -212,14 +212,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -434,14 +434,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -813,14 +813,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm2
-; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm3
-; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
+; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm2
-; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm3
-; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
+; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
>From 92061a6aba81b41cc9757fea2eec5b80ad928a8d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 14 Nov 2023 09:53:25 +0000
Subject: [PATCH 2/2] [X86] SimplifyDemandedVectorEltsForTargetNode - add
additional 256-bit/512-bit X86 binops that can be narrowed to smaller vector
widths
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +
.../test/CodeGen/X86/avx512-insert-extract.ll | 4 +-
.../CodeGen/X86/combine-avx2-intrinsics.ll | 2 +-
.../CodeGen/X86/horizontal-reduce-smax.ll | 24 +-
.../CodeGen/X86/horizontal-reduce-smin.ll | 24 +-
.../CodeGen/X86/horizontal-reduce-umax.ll | 58 ++---
.../CodeGen/X86/horizontal-reduce-umin.ll | 58 ++---
llvm/test/CodeGen/X86/kshift.ll | 10 +-
llvm/test/CodeGen/X86/var-permute-128.ll | 6 +-
.../X86/vector-reduce-fmax-fmin-fast.ll | 28 +--
.../CodeGen/X86/vector-reduce-fmax-nnan.ll | 28 +--
.../CodeGen/X86/vector-reduce-fmin-nnan.ll | 28 +--
llvm/test/CodeGen/X86/vector-reduce-mul.ll | 216 +++++++++---------
llvm/test/CodeGen/X86/vector-reduce-smax.ll | 18 +-
llvm/test/CodeGen/X86/vector-reduce-smin.ll | 18 +-
llvm/test/CodeGen/X86/vector-reduce-umax.ll | 42 ++--
llvm/test/CodeGen/X86/vector-reduce-umin.ll | 42 ++--
17 files changed, 315 insertions(+), 303 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ff726110121c608..fbd241a9d8ec07c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41450,6 +41450,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Integer ops.
case X86ISD::PACKSS:
case X86ISD::PACKUS:
+ case X86ISD::PCMPEQ:
+ case X86ISD::PCMPGT:
+ case X86ISD::PMULUDQ:
+ case X86ISD::PMULDQ:
+ case X86ISD::VSHLV:
+ case X86ISD::VSRLV:
+ case X86ISD::VSRAV:
+ // Float ops.
+ case X86ISD::FMAX:
+ case X86ISD::FMIN:
+ case X86ISD::FMAXC:
+ case X86ISD::FMINC:
// Horizontal Ops.
case X86ISD::HADD:
case X86ISD::HSUB:
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 6d7c9701f14f42d..4a2dd7673f4e767 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1048,8 +1048,8 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) nounwind
define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) nounwind {
; KNL-LABEL: test_extractelement_v32i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
+; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
+; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll b/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
index 9cd606557dcfd78..514e8d1fdd585a0 100644
--- a/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
@@ -113,7 +113,7 @@ define <4 x i32> @demandedelts_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: demandedelts_vpsrlvq:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
index f96aca9ac98d5f5..93049f9987a5e9b 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
@@ -480,10 +480,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -553,10 +553,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-LABEL: test_reduce_v4i64:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1147,10 +1147,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1264,10 +1264,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index cd1698431244c60..47bb0957f3fbb6a 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -483,10 +483,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -557,10 +557,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-LABEL: test_reduce_v4i64:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1151,10 +1151,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1268,10 +1268,10 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index ef4e8c595cb8600..5fde9bd5566b400 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -545,15 +545,15 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
-; X86-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
-; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
+; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X86-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -636,14 +636,14 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
-; X64-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
-; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
+; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X64-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1256,14 +1256,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
-; X86-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
-; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
+; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X86-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1401,14 +1401,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
-; X64-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
-; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
+; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X64-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 7b09c73f8f74469..699dce75e505c77 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -489,15 +489,15 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
-; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
-; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
+; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -582,14 +582,14 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
-; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
-; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
+; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
@@ -1172,14 +1172,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
-; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
-; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
+; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
@@ -1319,14 +1319,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
-; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
-; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
+; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
; X64-AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll
index 5ba96d72960e70e..0acf82f5a144a2e 100644
--- a/llvm/test/CodeGen/X86/kshift.ll
+++ b/llvm/test/CodeGen/X86/kshift.ll
@@ -267,12 +267,13 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftl_v64i1_63:
; KNL: # %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
@@ -569,7 +570,8 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k1
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index aa11ec54c80ac9f..99a3821bb9ba91e 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -1010,22 +1010,20 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
;
; AVX2-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpshufb %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512-NEXT: vpshufb %xmm1, %xmm2, %xmm2
; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
index e9b156b360752db..5bd9b0292a8f04e 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
@@ -107,9 +107,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX-LABEL: test_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -118,9 +118,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-LABEL: test_v8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -159,9 +159,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -172,9 +172,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -222,7 +222,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -231,7 +231,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -255,7 +255,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -266,7 +266,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -296,7 +296,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -308,7 +308,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 963cc3ccb7f66a7..71c4427da96253e 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -112,9 +112,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX-LABEL: test_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -123,9 +123,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-LABEL: test_v8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -164,9 +164,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -177,9 +177,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -269,7 +269,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -278,7 +278,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -302,7 +302,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -313,7 +313,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -343,7 +343,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -355,7 +355,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index 80f2b7628bb0717..0b2f9d69f0623c7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -153,9 +153,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX-LABEL: test_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -164,9 +164,9 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-LABEL: test_v8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -205,9 +205,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -218,9 +218,9 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -268,7 +268,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -277,7 +277,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -301,7 +301,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -312,7 +312,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
@@ -342,7 +342,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -354,7 +354,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index f46c8ec4c48bd70..0103b7622dc3e2f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -142,22 +142,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -166,22 +166,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BW-LABEL: test_v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -190,22 +190,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BWVL-LABEL: test_v4i64:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -333,22 +333,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -366,22 +366,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -399,22 +399,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -635,22 +635,22 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -676,22 +676,22 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -717,22 +717,22 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 70ef2faf53b4337..80b6b7e27e89282 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -180,10 +180,10 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -372,10 +372,10 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -697,10 +697,10 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 39e91a39825ea5f..7ead5a0eda6d7d6 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -180,10 +180,10 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -372,10 +372,10 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -697,10 +697,10 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index 620b99d2652a15b..4799b8e7e5857b4 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -211,14 +211,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
-; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
-; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -431,14 +431,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
-; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
-; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -809,14 +809,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
-; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
-; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm2
+; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm3
+; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
-; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
-; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm2
+; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm3
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index b9c7d006a4afb22..75eeec456c9ac38 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -212,14 +212,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
-; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -434,14 +434,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
-; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
-; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -813,14 +813,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
-; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
-; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm2
+; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm3
+; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
-; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
-; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vxorpd %xmm4, %xmm0, %xmm2
+; AVX2-NEXT: vxorpd %xmm4, %xmm1, %xmm3
+; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
More information about the llvm-commits
mailing list