[llvm] 05c0d34 - [X86][SSE] Prefer trunc(movd(x)) to pextrb(x,0)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 13 11:43:34 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-13T18:43:04Z
New Revision: 05c0d3491822b3a74f49be2fe8c8273e436ab7ec
URL: https://github.com/llvm/llvm-project/commit/05c0d3491822b3a74f49be2fe8c8273e436ab7ec
DIFF: https://github.com/llvm/llvm-project/commit/05c0d3491822b3a74f49be2fe8c8273e436ab7ec.diff
LOG: [X86][SSE] Prefer trunc(movd(x)) to pextrb(x,0)
If we're extracting the 0'th index of a v16i8 vector we're better off using MOVD than PEXTRB, unless we're storing the value or we require the implicit zero extension of PEXTRB.
The biggest perf diff is on SLM targets where MOVD (uops=1, lat=3 tp=1) is notably faster than PEXTRB (uops=2, lat=5, tp=4).
This matches what we already do for PEXTRW.
Differential Revision: https://reviews.llvm.org/D76138
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avg.ll
llvm/test/CodeGen/X86/avx512-vec3-crash.ll
llvm/test/CodeGen/X86/bitcast-vector-bool.ll
llvm/test/CodeGen/X86/buildvec-insertvec.ll
llvm/test/CodeGen/X86/extract-concat.ll
llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/test/CodeGen/X86/scalar_widen_div.ll
llvm/test/CodeGen/X86/var-permute-128.ll
llvm/test/CodeGen/X86/var-permute-512.ll
llvm/test/CodeGen/X86/vector-bitreverse.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
llvm/test/CodeGen/X86/vector-reduce-add.ll
llvm/test/CodeGen/X86/vector-reduce-and.ll
llvm/test/CodeGen/X86/vector-reduce-mul.ll
llvm/test/CodeGen/X86/vector-reduce-or.ll
llvm/test/CodeGen/X86/vector-reduce-smax.ll
llvm/test/CodeGen/X86/vector-reduce-smin.ll
llvm/test/CodeGen/X86/vector-reduce-umax.ll
llvm/test/CodeGen/X86/vector-reduce-umin.ll
llvm/test/CodeGen/X86/vector-reduce-xor.ll
llvm/test/CodeGen/X86/widen_bitops-0.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index caee2a4fb75d..f71ec4840409 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17830,6 +17830,14 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
return SDValue();
if (VT.getSizeInBits() == 8) {
+ // If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
+ // we're going to zero extend the register or fold the store.
+ if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) &&
+ !MayFoldIntoStore(Op))
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Vec), Idx));
+
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
}
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index 98cb20fd5f99..1448ba0c39e2 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -2598,17 +2598,17 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX512BW-NEXT: vmovq %xmm2, %rdx
-; AVX512BW-NEXT: vpextrq $1, %xmm2, %r10
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %r15
; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512BW-NEXT: vmovq %xmm2, %r8
; AVX512BW-NEXT: vpextrq $1, %xmm2, %r9
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX512BW-NEXT: vmovq %xmm2, %r14
-; AVX512BW-NEXT: vpextrq $1, %xmm2, %r12
-; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512BW-NEXT: vmovq %xmm2, %r11
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %r10
+; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
+; AVX512BW-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512BW-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
@@ -2617,23 +2617,24 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX512BW-NEXT: vmovq %xmm3, %rax
-; AVX512BW-NEXT: addq %rbx, %rax
-; AVX512BW-NEXT: movq %rax, %rbx
-; AVX512BW-NEXT: vpextrq $1, %xmm3, %r13
-; AVX512BW-NEXT: addq %rbp, %r13
-; AVX512BW-NEXT: vextracti128 $1, %ymm3, %xmm3
; AVX512BW-NEXT: vmovq %xmm3, %rcx
-; AVX512BW-NEXT: addq %rdi, %rcx
+; AVX512BW-NEXT: addq %rbx, %rcx
+; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT: addq %rbp, %rax
+; AVX512BW-NEXT: movq %rax, %rbp
+; AVX512BW-NEXT: vextracti128 $1, %ymm3, %xmm3
+; AVX512BW-NEXT: vmovq %xmm3, %r14
+; AVX512BW-NEXT: addq %rdi, %r14
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: addq %rsi, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX512BW-NEXT: vmovq %xmm2, %r15
-; AVX512BW-NEXT: addq %rdx, %r15
-; AVX512BW-NEXT: vpextrq $1, %xmm2, %rbp
-; AVX512BW-NEXT: addq %r10, %rbp
+; AVX512BW-NEXT: vmovq %xmm2, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
+; AVX512BW-NEXT: movq %rax, %rdx
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %r12
+; AVX512BW-NEXT: addq %r15, %r12
; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512BW-NEXT: vmovq %xmm2, %rax
; AVX512BW-NEXT: addq %r8, %rax
@@ -2645,54 +2646,54 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512BW-NEXT: vmovq %xmm2, %rax
-; AVX512BW-NEXT: addq %r14, %rax
+; AVX512BW-NEXT: addq %r11, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: vpextrq $1, %xmm2, %rax
-; AVX512BW-NEXT: addq %r12, %rax
+; AVX512BW-NEXT: addq %r10, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2
-; AVX512BW-NEXT: vmovq %xmm2, %r12
-; AVX512BW-NEXT: addq %r11, %r12
-; AVX512BW-NEXT: vpextrq $1, %xmm2, %r11
-; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX512BW-NEXT: vmovq %xmm2, %r13
+; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX512BW-NEXT: vpextrq $1, %xmm2, %rbx
+; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512BW-NEXT: vmovq %xmm0, %r9
+; AVX512BW-NEXT: vmovq %xmm0, %r10
+; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %r9
; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; AVX512BW-NEXT: vpextrq $1, %xmm0, %r8
-; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
; AVX512BW-NEXT: vmovq %xmm1, %rax
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512BW-NEXT: vmovq %xmm0, %rsi
-; AVX512BW-NEXT: addq %rax, %rsi
+; AVX512BW-NEXT: vmovq %xmm0, %r8
+; AVX512BW-NEXT: addq %rax, %r8
; AVX512BW-NEXT: vpextrq $1, %xmm1, %rdi
-; AVX512BW-NEXT: vpextrq $1, %xmm0, %rdx
-; AVX512BW-NEXT: addq %rdi, %rdx
-; AVX512BW-NEXT: addq $-1, %rbx
-; AVX512BW-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512BW-NEXT: movl $0, %r10d
-; AVX512BW-NEXT: adcq $-1, %r10
-; AVX512BW-NEXT: addq $-1, %r13
-; AVX512BW-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512BW-NEXT: movl $0, %eax
-; AVX512BW-NEXT: adcq $-1, %rax
-; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512BW-NEXT: addq %rdi, %rsi
; AVX512BW-NEXT: addq $-1, %rcx
-; AVX512BW-NEXT: movq %rcx, (%rsp) # 8-byte Spill
-; AVX512BW-NEXT: movl $0, %r14d
-; AVX512BW-NEXT: adcq $-1, %r14
-; AVX512BW-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX512BW-NEXT: movl $0, %ebx
-; AVX512BW-NEXT: adcq $-1, %rbx
-; AVX512BW-NEXT: addq $-1, %r15
-; AVX512BW-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512BW-NEXT: movl $0, %r13d
-; AVX512BW-NEXT: adcq $-1, %r13
+; AVX512BW-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BW-NEXT: movl $0, %r11d
+; AVX512BW-NEXT: adcq $-1, %r11
; AVX512BW-NEXT: addq $-1, %rbp
; AVX512BW-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BW-NEXT: movl $0, %edi
+; AVX512BW-NEXT: adcq $-1, %rdi
+; AVX512BW-NEXT: addq $-1, %r14
+; AVX512BW-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: movl $0, %r15d
; AVX512BW-NEXT: adcq $-1, %r15
; AVX512BW-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512BW-NEXT: movl $0, %r14d
+; AVX512BW-NEXT: adcq $-1, %r14
+; AVX512BW-NEXT: addq $-1, %rdx
+; AVX512BW-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BW-NEXT: movl $0, %eax
+; AVX512BW-NEXT: adcq $-1, %rax
+; AVX512BW-NEXT: movq %rax, (%rsp) # 8-byte Spill
+; AVX512BW-NEXT: addq $-1, %r12
+; AVX512BW-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BW-NEXT: movl $0, %r12d
+; AVX512BW-NEXT: adcq $-1, %r12
+; AVX512BW-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512BW-NEXT: movl $0, %eax
; AVX512BW-NEXT: adcq $-1, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -2708,14 +2709,17 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512BW-NEXT: movl $0, %eax
; AVX512BW-NEXT: adcq $-1, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512BW-NEXT: addq $-1, %r12
+; AVX512BW-NEXT: addq $-1, %r13
; AVX512BW-NEXT: movl $0, %eax
; AVX512BW-NEXT: adcq $-1, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512BW-NEXT: addq $-1, %r11
+; AVX512BW-NEXT: addq $-1, %rbx
; AVX512BW-NEXT: movl $0, %eax
; AVX512BW-NEXT: adcq $-1, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BW-NEXT: addq $-1, %r10
+; AVX512BW-NEXT: movl $0, %edx
+; AVX512BW-NEXT: adcq $-1, %rdx
; AVX512BW-NEXT: addq $-1, %r9
; AVX512BW-NEXT: movl $0, %ecx
; AVX512BW-NEXT: adcq $-1, %rcx
@@ -2725,118 +2729,114 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512BW-NEXT: addq $-1, %rsi
; AVX512BW-NEXT: movl $0, %ebp
; AVX512BW-NEXT: adcq $-1, %rbp
-; AVX512BW-NEXT: addq $-1, %rdx
-; AVX512BW-NEXT: movl $0, %edi
-; AVX512BW-NEXT: adcq $-1, %rdi
-; AVX512BW-NEXT: shldq $63, %rdx, %rdi
-; AVX512BW-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: shldq $63, %rsi, %rbp
+; AVX512BW-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: shldq $63, %r8, %rax
; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512BW-NEXT: shldq $63, %r9, %rcx
-; AVX512BW-NEXT: movq %rcx, %r8
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %r11, %r9
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %r12, %r11
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; AVX512BW-NEXT: movq %rcx, %rbp
+; AVX512BW-NEXT: shldq $63, %r10, %rdx
+; AVX512BW-NEXT: movq %rdx, %r9
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %rbx, %r10
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %r13, %r8
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %r12
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %rdi
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %rax, %r13
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %rdx
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %rax, %rbx
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; AVX512BW-NEXT: shldq $63, %rax, %rsi
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %r15
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %rax, %rdx
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %r13
+; AVX512BW-NEXT: shldq $63, %rax, %r12
+; AVX512BW-NEXT: movq (%rsp), %rcx # 8-byte Reload
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %rax, %rcx
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %rbx
-; AVX512BW-NEXT: movq (%rsp), %rax # 8-byte Reload
; AVX512BW-NEXT: shldq $63, %rax, %r14
-; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %rcx
+; AVX512BW-NEXT: shldq $63, %rax, %r15
; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX512BW-NEXT: shldq $63, %rax, %r10
-; AVX512BW-NEXT: vmovq %r10, %xmm0
-; AVX512BW-NEXT: vmovq %rcx, %xmm1
+; AVX512BW-NEXT: shldq $63, %rax, %rdi
+; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512BW-NEXT: shldq $63, %rax, %r11
+; AVX512BW-NEXT: vmovq %r11, %xmm0
+; AVX512BW-NEXT: vmovq %rdi, %xmm1
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT: vmovq %r14, %xmm2
-; AVX512BW-NEXT: vmovq %rbx, %xmm3
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm0, %xmm1
+; AVX512BW-NEXT: vmovq %r15, %xmm2
+; AVX512BW-NEXT: vmovq %r14, %xmm3
; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
-; AVX512BW-NEXT: vmovq %r13, %xmm1
-; AVX512BW-NEXT: vmovq %r15, %xmm2
+; AVX512BW-NEXT: vmovq %rcx, %xmm1
+; AVX512BW-NEXT: vmovq %r12, %xmm2
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512BW-NEXT: vmovq %rsi, %xmm2
-; AVX512BW-NEXT: vmovq %rdx, %xmm3
+; AVX512BW-NEXT: vmovq %rdx, %xmm2
+; AVX512BW-NEXT: vmovq %rsi, %xmm3
; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512BW-NEXT: vmovq %rdi, %xmm1
-; AVX512BW-NEXT: vmovq %r12, %xmm2
+; AVX512BW-NEXT: vmovq %rbx, %xmm1
+; AVX512BW-NEXT: vmovq %r13, %xmm2
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512BW-NEXT: vmovq %r11, %xmm2
-; AVX512BW-NEXT: vmovq %r9, %xmm3
+; AVX512BW-NEXT: vmovq %r8, %xmm2
+; AVX512BW-NEXT: vmovq %r10, %xmm3
; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512BW-NEXT: vmovq %r8, %xmm1
-; AVX512BW-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Folded Reload
-; AVX512BW-NEXT: # xmm2 = mem[0],zero
+; AVX512BW-NEXT: vmovq %r9, %xmm1
+; AVX512BW-NEXT: vmovq %rbp, %xmm2
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512BW-NEXT: vmovq %rbp, %xmm2
+; AVX512BW-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Folded Reload
+; AVX512BW-NEXT: # xmm2 = mem[0],zero
; AVX512BW-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Folded Reload
; AVX512BW-NEXT: # xmm3 = mem[0],zero
; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
; AVX512BW-NEXT: addq $24, %rsp
diff --git a/llvm/test/CodeGen/X86/avx512-vec3-crash.ll b/llvm/test/CodeGen/X86/avx512-vec3-crash.ll
index 5a3bc7f8b59c..a6e354b3330e 100644
--- a/llvm/test/CodeGen/X86/avx512-vec3-crash.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec3-crash.ll
@@ -12,7 +12,7 @@ define <3 x i8 > @foo(<3 x i8>%x, <3 x i8>%a, <3 x i8>%b) {
; CHECK-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1
; CHECK-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpextrb $0, %xmm0, %eax
+; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vpextrb $1, %xmm0, %edx
; CHECK-NEXT: vpextrb $2, %xmm0, %ecx
; CHECK-NEXT: # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index d0aa8f13d39e..66bb6aa3d662 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -158,7 +158,7 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
; AVX512-NEXT: vpmovb2m %xmm0, %k0
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -311,7 +311,7 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; AVX512-NEXT: vpmovw2m %ymm0, %k0
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -505,7 +505,7 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
index 95d5e940265b..3add65914b58 100644
--- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
@@ -23,8 +23,6 @@ define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
; SSE41-NEXT: cvttps2dq %xmm0, %xmm0
; SSE41-NEXT: pextrb $8, %xmm0, %eax
; SSE41-NEXT: pextrb $4, %xmm0, %ecx
-; SSE41-NEXT: pextrb $0, %xmm0, %edx
-; SSE41-NEXT: movd %edx, %xmm0
; SSE41-NEXT: pinsrb $1, %ecx, %xmm0
; SSE41-NEXT: pinsrb $2, %eax, %xmm0
; SSE41-NEXT: movl $255, %eax
diff --git a/llvm/test/CodeGen/X86/extract-concat.ll b/llvm/test/CodeGen/X86/extract-concat.ll
index b31e20196c5f..b860b7281ee6 100644
--- a/llvm/test/CodeGen/X86/extract-concat.ll
+++ b/llvm/test/CodeGen/X86/extract-concat.ll
@@ -10,8 +10,6 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
; SSE42-NEXT: pextrb $8, %xmm0, %eax
; SSE42-NEXT: pextrb $4, %xmm0, %ecx
-; SSE42-NEXT: pextrb $0, %xmm0, %edx
-; SSE42-NEXT: movd %edx, %xmm0
; SSE42-NEXT: pinsrb $1, %ecx, %xmm0
; SSE42-NEXT: pinsrb $2, %eax, %xmm0
; SSE42-NEXT: movl $255, %eax
@@ -24,8 +22,6 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpextrb $8, %xmm0, %eax
; AVX-NEXT: vpextrb $4, %xmm0, %ecx
-; AVX-NEXT: vpextrb $0, %xmm0, %edx
-; AVX-NEXT: vmovd %edx, %xmm0
; AVX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; AVX-NEXT: movl $255, %eax
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
index 9419aff77ba0..dc8f60248c67 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
@@ -310,7 +310,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -321,7 +321,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
@@ -365,7 +365,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -376,7 +376,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
@@ -876,7 +876,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -889,7 +889,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorb $127, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
@@ -903,7 +903,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorb $127, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
@@ -954,7 +954,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -967,7 +967,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorb $127, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -981,7 +981,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorb $127, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -995,7 +995,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorb $127, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -1662,7 +1662,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -1678,7 +1678,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorb $127, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
@@ -1693,7 +1693,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorb $127, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
@@ -1756,7 +1756,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -1772,7 +1772,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorb $127, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -1787,7 +1787,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorb $127, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -1803,7 +1803,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorb $127, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -2028,7 +2028,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -2039,7 +2039,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@@ -2084,7 +2084,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -2095,7 +2095,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
@@ -2156,7 +2156,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -2167,7 +2167,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@@ -2212,7 +2212,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -2223,7 +2223,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index 4153e2ffef27..7867b5306bf7 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -312,7 +312,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -323,7 +323,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
@@ -367,7 +367,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -378,7 +378,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
@@ -880,7 +880,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -893,7 +893,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorb $-128, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
@@ -907,7 +907,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorb $-128, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
@@ -958,7 +958,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -971,7 +971,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorb $-128, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -985,7 +985,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorb $-128, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -999,7 +999,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorb $-128, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -1666,7 +1666,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -1682,7 +1682,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorb $-128, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
@@ -1697,7 +1697,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorb $-128, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
@@ -1760,7 +1760,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -1776,7 +1776,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorb $-128, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -1791,7 +1791,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorb $-128, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -1807,7 +1807,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorb $-128, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -2032,7 +2032,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -2043,7 +2043,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@@ -2088,7 +2088,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -2099,7 +2099,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
@@ -2160,7 +2160,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -2171,7 +2171,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@@ -2216,7 +2216,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -2227,7 +2227,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index a049e664de44..6e59cd046cb0 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -366,7 +366,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -378,7 +378,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
@@ -407,7 +407,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -419,7 +419,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: retq
@@ -431,7 +431,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: retq
@@ -442,7 +442,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: retq
@@ -988,7 +988,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -1002,7 +1002,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: notb %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
@@ -1017,7 +1017,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: notb %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
@@ -1049,7 +1049,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -1063,7 +1063,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -1078,7 +1078,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -1092,7 +1092,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -1835,7 +1835,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -1852,7 +1852,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: notb %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
@@ -1868,7 +1868,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: notb %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
@@ -1904,7 +1904,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -1921,7 +1921,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -1937,7 +1937,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -1953,7 +1953,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -2229,7 +2229,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -2241,7 +2241,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@@ -2271,7 +2271,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -2283,7 +2283,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -2296,7 +2296,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -2308,7 +2308,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
@@ -2354,7 +2354,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
@@ -2366,7 +2366,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@@ -2396,7 +2396,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
@@ -2408,7 +2408,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
@@ -2421,7 +2421,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
@@ -2433,7 +2433,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 9cdba2f5fd99..413b5f2ac4aa 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -335,7 +335,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@@ -344,7 +344,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
@@ -370,7 +370,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@@ -379,7 +379,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -906,7 +906,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@@ -917,7 +917,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@@ -929,7 +929,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@@ -958,7 +958,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@@ -969,7 +969,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@@ -981,7 +981,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@@ -993,7 +993,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@@ -1717,7 +1717,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@@ -1731,7 +1731,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@@ -1744,7 +1744,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@@ -1777,7 +1777,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@@ -1791,7 +1791,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@@ -1804,7 +1804,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@@ -1818,7 +1818,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@@ -2025,7 +2025,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@@ -2034,7 +2034,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@@ -2061,7 +2061,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@@ -2070,7 +2070,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@@ -2113,7 +2113,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@@ -2122,7 +2122,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@@ -2149,7 +2149,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@@ -2158,7 +2158,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index f658df20990b..d21b44158068 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -261,8 +261,8 @@ define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
; CHECK-NEXT: cbtw
; CHECK-NEXT: idivb %cl
; CHECK-NEXT: movsbl %ah, %ecx
-; CHECK-NEXT: pextrb $0, %xmm1, %edx
-; CHECK-NEXT: pextrb $0, %xmm0, %eax
+; CHECK-NEXT: movd %xmm1, %edx
+; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: cbtw
; CHECK-NEXT: idivb %dl
; CHECK-NEXT: movsbl %ah, %eax
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index f16331144666..9767f8624572 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -922,7 +922,7 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
; SSE41-NEXT: movq %rsp, %rbp
; SSE41-NEXT: andq $-32, %rsp
; SSE41-NEXT: subq $544, %rsp # imm = 0x220
-; SSE41-NEXT: pextrb $0, %xmm2, %eax
+; SSE41-NEXT: movd %xmm2, %eax
; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; SSE41-NEXT: andl $31, %eax
diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll
index d2c7b94cb332..58cad0b11c4a 100644
--- a/llvm/test/CodeGen/X86/var-permute-512.ll
+++ b/llvm/test/CodeGen/X86/var-permute-512.ll
@@ -390,7 +390,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm4
-; AVX512F-NEXT: vpextrb $0, %xmm4, %eax
+; AVX512F-NEXT: vmovd %xmm4, %eax
; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp)
@@ -567,7 +567,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: vpextrb $15, %xmm4, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: vpinsrb $15, 4032(%rsp,%rax), %xmm0, %xmm0
-; AVX512F-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512F-NEXT: vmovd %xmm2, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl 2048(%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
@@ -616,7 +616,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: vpextrb $15, %xmm2, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: vpinsrb $15, 3008(%rsp,%rax), %xmm4, %xmm2
-; AVX512F-NEXT: vpextrb $0, %xmm3, %eax
+; AVX512F-NEXT: vmovd %xmm3, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl 1024(%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
@@ -665,7 +665,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: vpextrb $15, %xmm3, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: vpinsrb $15, 1984(%rsp,%rax), %xmm4, %xmm3
-; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512F-NEXT: vmovd %xmm1, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
@@ -730,7 +730,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax
+; AVX512BW-NEXT: vmovd %xmm4, %eax
; AVX512BW-NEXT: vmovaps %zmm0, (%rsp)
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
@@ -780,7 +780,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax
+; AVX512BW-NEXT: vmovd %xmm3, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
@@ -832,7 +832,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
+; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
@@ -885,7 +885,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
+; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index f421ac303d87..5d9cd1643aec 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -54,7 +54,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; XOP: # %bb.0:
; XOP-NEXT: vmovd %edi, %xmm0
; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
-; XOP-NEXT: vpextrb $0, %xmm0, %eax
+; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: # kill: def $al killed $al killed $eax
; XOP-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index abd4002a762d..d1aed7076a88 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -1059,8 +1059,8 @@ define <16 x i8> @test_rem_variable_16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE41-NEXT: cbtw
; SSE41-NEXT: idivb %cl
; SSE41-NEXT: movsbl %ah, %ecx
-; SSE41-NEXT: pextrb $0, %xmm1, %edx
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm1, %edx
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: cbtw
; SSE41-NEXT: idivb %dl
; SSE41-NEXT: movsbl %ah, %eax
@@ -1160,8 +1160,8 @@ define <16 x i8> @test_rem_variable_16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX-NEXT: cbtw
; AVX-NEXT: idivb %cl
; AVX-NEXT: movsbl %ah, %ecx
-; AVX-NEXT: vpextrb $0, %xmm1, %edx
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm1, %edx
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: cbtw
; AVX-NEXT: idivb %dl
; AVX-NEXT: movsbl %ah, %eax
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll
index 60636a86da8d..e4f785dca2b1 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll
@@ -942,29 +942,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: paddb %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -972,7 +963,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0)
@@ -980,27 +971,16 @@ define i8 @test_v2i8(<2 x i8> %a0) {
}
define i8 @test_v2i8_load(<2 x i8>* %p) {
-; SSE2-LABEL: test_v2i8_load:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movzwl (%rdi), %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8_load:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movzwl (%rdi), %eax
-; SSE41-NEXT: movd %eax, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8_load:
+; SSE: # %bb.0:
+; SSE-NEXT: movzwl (%rdi), %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: paddb %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8_load:
; AVX: # %bb.0:
@@ -1008,7 +988,7 @@ define i8 @test_v2i8_load(<2 x i8>* %p) {
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1018,7 +998,7 @@ define i8 @test_v2i8_load(<2 x i8>* %p) {
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%a0 = load <2 x i8>, <2 x i8>* %p
@@ -1041,7 +1021,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; SSE41-NEXT: psadbw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1050,7 +1030,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1060,7 +1040,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0)
@@ -1068,30 +1048,21 @@ define i8 @test_v4i8(<4 x i8> %a0) {
}
define i8 @test_v4i8_load(<4 x i8>* %p) {
-; SSE2-LABEL: test_v4i8_load:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: psadbw %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v4i8_load:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: psadbw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v4i8_load:
+; SSE: # %bb.0:
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: psadbw %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i8_load:
; AVX: # %bb.0:
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1100,7 +1071,7 @@ define i8 @test_v4i8_load(<4 x i8>* %p) {
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%a0 = load <4 x i8>, <4 x i8>* %p
@@ -1109,27 +1080,19 @@ define i8 @test_v4i8_load(<4 x i8>* %p) {
}
define i8 @test_v8i8(<8 x i8> %a0) {
-; SSE2-LABEL: test_v8i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: psadbw %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: psadbw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: psadbw %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1137,7 +1100,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0)
@@ -1145,30 +1108,21 @@ define i8 @test_v8i8(<8 x i8> %a0) {
}
define i8 @test_v8i8_load(<8 x i8>* %p) {
-; SSE2-LABEL: test_v8i8_load:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: psadbw %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8_load:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: psadbw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8_load:
+; SSE: # %bb.0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: psadbw %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8_load:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1177,7 +1131,7 @@ define i8 @test_v8i8_load(<8 x i8>* %p) {
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%a0 = load <8 x i8>, <8 x i8>* %p
@@ -1186,25 +1140,15 @@ define i8 @test_v8i8_load(<8 x i8>* %p) {
}
define i8 @test_v16i8(<16 x i8> %a0) {
-; SSE2-LABEL: test_v16i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm0
-; SSE2-NEXT: psadbw %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v16i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm0
-; SSE41-NEXT: psadbw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v16i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: paddb %xmm0, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: psadbw %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
@@ -1212,7 +1156,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1222,7 +1166,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a0)
@@ -1230,27 +1174,16 @@ define i8 @test_v16i8(<16 x i8> %a0) {
}
define i8 @test_v32i8(<32 x i8> %a0) {
-; SSE2-LABEL: test_v32i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: paddb %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm0
-; SSE2-NEXT: psadbw %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v32i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: paddb %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm0
-; SSE41-NEXT: psadbw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: paddb %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: paddb %xmm0, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: psadbw %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v32i8:
; AVX1: # %bb.0:
@@ -1260,7 +1193,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1273,7 +1206,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1286,7 +1219,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1295,31 +1228,18 @@ define i8 @test_v32i8(<32 x i8> %a0) {
}
define i8 @test_v64i8(<64 x i8> %a0) {
-; SSE2-LABEL: test_v64i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: paddb %xmm3, %xmm1
-; SSE2-NEXT: paddb %xmm2, %xmm1
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: paddb %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: psadbw %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v64i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: paddb %xmm3, %xmm1
-; SSE41-NEXT: paddb %xmm2, %xmm1
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT: paddb %xmm1, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: psadbw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v64i8:
+; SSE: # %bb.0:
+; SSE-NEXT: paddb %xmm3, %xmm1
+; SSE-NEXT: paddb %xmm2, %xmm1
+; SSE-NEXT: paddb %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT: paddb %xmm1, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: psadbw %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v64i8:
; AVX1: # %bb.0:
@@ -1332,7 +1252,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1346,7 +1266,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1361,7 +1281,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1370,39 +1290,22 @@ define i8 @test_v64i8(<64 x i8> %a0) {
}
define i8 @test_v128i8(<128 x i8> %a0) {
-; SSE2-LABEL: test_v128i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: paddb %xmm7, %xmm3
-; SSE2-NEXT: paddb %xmm5, %xmm3
-; SSE2-NEXT: paddb %xmm1, %xmm3
-; SSE2-NEXT: paddb %xmm6, %xmm2
-; SSE2-NEXT: paddb %xmm4, %xmm2
-; SSE2-NEXT: paddb %xmm3, %xmm2
-; SSE2-NEXT: paddb %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE2-NEXT: paddb %xmm2, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: psadbw %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v128i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: paddb %xmm7, %xmm3
-; SSE41-NEXT: paddb %xmm5, %xmm3
-; SSE41-NEXT: paddb %xmm1, %xmm3
-; SSE41-NEXT: paddb %xmm6, %xmm2
-; SSE41-NEXT: paddb %xmm4, %xmm2
-; SSE41-NEXT: paddb %xmm3, %xmm2
-; SSE41-NEXT: paddb %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE41-NEXT: paddb %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: psadbw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v128i8:
+; SSE: # %bb.0:
+; SSE-NEXT: paddb %xmm7, %xmm3
+; SSE-NEXT: paddb %xmm5, %xmm3
+; SSE-NEXT: paddb %xmm1, %xmm3
+; SSE-NEXT: paddb %xmm6, %xmm2
+; SSE-NEXT: paddb %xmm4, %xmm2
+; SSE-NEXT: paddb %xmm3, %xmm2
+; SSE-NEXT: paddb %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE-NEXT: paddb %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: psadbw %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v128i8:
; AVX1: # %bb.0:
@@ -1421,7 +1324,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1437,7 +1340,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1453,7 +1356,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll
index d3f63f6c887a..2fc924b1b125 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll
@@ -684,29 +684,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0)
@@ -714,29 +705,17 @@ define i8 @test_v2i8(<2 x i8> %a0) {
}
define i8 @test_v4i8(<4 x i8> %a0) {
-; SSE2-LABEL: test_v4i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i8:
; AVX: # %bb.0:
@@ -744,7 +723,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0)
@@ -752,33 +731,19 @@ define i8 @test_v4i8(<4 x i8> %a0) {
}
define i8 @test_v8i8(<8 x i8> %a0) {
-; SSE2-LABEL: test_v8i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8:
; AVX: # %bb.0:
@@ -788,7 +753,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0)
@@ -796,37 +761,21 @@ define i8 @test_v8i8(<8 x i8> %a0) {
}
define i8 @test_v16i8(<16 x i8> %a0) {
-; SSE2-LABEL: test_v16i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v16i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v16i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
@@ -838,7 +787,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0)
@@ -846,39 +795,22 @@ define i8 @test_v16i8(<16 x i8> %a0) {
}
define i8 @test_v32i8(<32 x i8> %a0) {
-; SSE2-LABEL: test_v32i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v32i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v32i8:
; AVX1: # %bb.0:
@@ -892,7 +824,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -909,7 +841,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -926,7 +858,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -935,43 +867,24 @@ define i8 @test_v32i8(<32 x i8> %a0) {
}
define i8 @test_v64i8(<64 x i8> %a0) {
-; SSE2-LABEL: test_v64i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand %xmm3, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v64i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand %xmm3, %xmm1
-; SSE41-NEXT: pand %xmm2, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v64i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v64i8:
; AVX1: # %bb.0:
@@ -986,7 +899,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1004,7 +917,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1023,7 +936,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1032,51 +945,28 @@ define i8 @test_v64i8(<64 x i8> %a0) {
}
define i8 @test_v128i8(<128 x i8> %a0) {
-; SSE2-LABEL: test_v128i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand %xmm6, %xmm2
-; SSE2-NEXT: pand %xmm7, %xmm3
-; SSE2-NEXT: pand %xmm5, %xmm3
-; SSE2-NEXT: pand %xmm1, %xmm3
-; SSE2-NEXT: pand %xmm4, %xmm2
-; SSE2-NEXT: pand %xmm3, %xmm2
-; SSE2-NEXT: pand %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v128i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand %xmm6, %xmm2
-; SSE41-NEXT: pand %xmm7, %xmm3
-; SSE41-NEXT: pand %xmm5, %xmm3
-; SSE41-NEXT: pand %xmm1, %xmm3
-; SSE41-NEXT: pand %xmm4, %xmm2
-; SSE41-NEXT: pand %xmm3, %xmm2
-; SSE41-NEXT: pand %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v128i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pand %xmm6, %xmm2
+; SSE-NEXT: pand %xmm7, %xmm3
+; SSE-NEXT: pand %xmm5, %xmm3
+; SSE-NEXT: pand %xmm1, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: pand %xmm3, %xmm2
+; SSE-NEXT: pand %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v128i8:
; AVX1: # %bb.0:
@@ -1093,7 +983,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1113,7 +1003,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1133,7 +1023,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index ad229da8241c..39b189c157ce 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -1531,29 +1531,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pmullw %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pmullw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pmullw %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1561,7 +1552,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> %a0)
@@ -1591,7 +1582,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrld $16, %xmm0
; SSE41-NEXT: pmullw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1602,7 +1593,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1613,7 +1604,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: retq
;
@@ -1625,7 +1616,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
;
@@ -1636,7 +1627,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0)
@@ -1671,7 +1662,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrld $16, %xmm0
; SSE41-NEXT: pmullw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1684,7 +1675,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1697,7 +1688,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: retq
;
@@ -1711,7 +1702,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
;
@@ -1724,7 +1715,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %a0)
@@ -1783,7 +1774,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pmullw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: movd %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1801,7 +1792,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
@@ -1817,7 +1808,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: retq
;
@@ -1838,7 +1829,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1
; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1860,7 +1851,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1885,7 +1876,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@@ -1910,7 +1901,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512DQVL-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
@@ -1984,7 +1975,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pmullw %xmm2, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -2007,7 +1998,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2039,7 +2030,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -2067,7 +2058,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -2095,7 +2086,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -2127,7 +2118,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@@ -2159,7 +2150,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
@@ -2256,7 +2247,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pmullw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -2288,7 +2279,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2323,7 +2314,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -2364,7 +2355,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512BW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -2405,7 +2396,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512BWVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -2441,7 +2432,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512DQ-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@@ -2482,7 +2473,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
@@ -2632,7 +2623,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pmullw %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: movd %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -2682,7 +2673,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2725,7 +2716,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -2774,7 +2765,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512BW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -2823,7 +2814,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512BWVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -2866,7 +2857,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512DQ-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@@ -2914,7 +2905,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512DQVL-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll
index 0ad1283a5f00..14eb3d27d8df 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll
@@ -684,29 +684,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0)
@@ -714,29 +705,17 @@ define i8 @test_v2i8(<2 x i8> %a0) {
}
define i8 @test_v4i8(<4 x i8> %a0) {
-; SSE2-LABEL: test_v4i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i8:
; AVX: # %bb.0:
@@ -744,7 +723,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0)
@@ -752,33 +731,19 @@ define i8 @test_v4i8(<4 x i8> %a0) {
}
define i8 @test_v8i8(<8 x i8> %a0) {
-; SSE2-LABEL: test_v8i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8:
; AVX: # %bb.0:
@@ -788,7 +753,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0)
@@ -796,37 +761,21 @@ define i8 @test_v8i8(<8 x i8> %a0) {
}
define i8 @test_v16i8(<16 x i8> %a0) {
-; SSE2-LABEL: test_v16i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v16i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v16i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
@@ -838,7 +787,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0)
@@ -846,39 +795,22 @@ define i8 @test_v16i8(<16 x i8> %a0) {
}
define i8 @test_v32i8(<32 x i8> %a0) {
-; SSE2-LABEL: test_v32i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v32i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v32i8:
; AVX1: # %bb.0:
@@ -892,7 +824,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -909,7 +841,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -926,7 +858,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -935,43 +867,24 @@ define i8 @test_v32i8(<32 x i8> %a0) {
}
define i8 @test_v64i8(<64 x i8> %a0) {
-; SSE2-LABEL: test_v64i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: por %xmm3, %xmm1
-; SSE2-NEXT: por %xmm2, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v64i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm3, %xmm1
-; SSE41-NEXT: por %xmm2, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v64i8:
+; SSE: # %bb.0:
+; SSE-NEXT: por %xmm3, %xmm1
+; SSE-NEXT: por %xmm2, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v64i8:
; AVX1: # %bb.0:
@@ -986,7 +899,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1004,7 +917,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1023,7 +936,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1032,51 +945,28 @@ define i8 @test_v64i8(<64 x i8> %a0) {
}
define i8 @test_v128i8(<128 x i8> %a0) {
-; SSE2-LABEL: test_v128i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: por %xmm6, %xmm2
-; SSE2-NEXT: por %xmm7, %xmm3
-; SSE2-NEXT: por %xmm5, %xmm3
-; SSE2-NEXT: por %xmm1, %xmm3
-; SSE2-NEXT: por %xmm4, %xmm2
-; SSE2-NEXT: por %xmm3, %xmm2
-; SSE2-NEXT: por %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE2-NEXT: por %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v128i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm6, %xmm2
-; SSE41-NEXT: por %xmm7, %xmm3
-; SSE41-NEXT: por %xmm5, %xmm3
-; SSE41-NEXT: por %xmm1, %xmm3
-; SSE41-NEXT: por %xmm4, %xmm2
-; SSE41-NEXT: por %xmm3, %xmm2
-; SSE41-NEXT: por %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE41-NEXT: por %xmm2, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v128i8:
+; SSE: # %bb.0:
+; SSE-NEXT: por %xmm6, %xmm2
+; SSE-NEXT: por %xmm7, %xmm3
+; SSE-NEXT: por %xmm5, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: por %xmm4, %xmm2
+; SSE-NEXT: por %xmm3, %xmm2
+; SSE-NEXT: por %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v128i8:
; AVX1: # %bb.0:
@@ -1093,7 +983,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1113,7 +1003,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1133,7 +1023,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 8a184cd78e48..1160fbbc48f6 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -1400,7 +1400,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pmaxsb %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: movd %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1408,7 +1408,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1416,7 +1416,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> %a0)
@@ -1452,7 +1452,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1462,7 +1462,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1472,7 +1472,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> %a0)
@@ -1516,7 +1516,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pmaxsb %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: movd %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1528,7 +1528,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1540,7 +1540,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a0)
@@ -1587,7 +1587,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1598,7 +1598,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: xorb $127, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -1609,7 +1609,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -1663,7 +1663,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1676,7 +1676,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1690,7 +1690,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1704,7 +1704,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -1771,7 +1771,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1787,7 +1787,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1802,7 +1802,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1818,7 +1818,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -1909,7 +1909,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm2, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1931,7 +1931,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1948,7 +1948,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1965,7 +1965,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 884b59d72c76..edd6b7f7fd32 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -1398,7 +1398,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminsb %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: movd %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1406,7 +1406,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1414,7 +1414,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> %a0)
@@ -1450,7 +1450,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1460,7 +1460,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1470,7 +1470,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> %a0)
@@ -1514,7 +1514,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminsb %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: movd %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1526,7 +1526,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1538,7 +1538,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a0)
@@ -1585,7 +1585,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1596,7 +1596,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: xorb $-128, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -1607,7 +1607,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -1661,7 +1661,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1674,7 +1674,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1688,7 +1688,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1702,7 +1702,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -1769,7 +1769,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1785,7 +1785,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1800,7 +1800,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1816,7 +1816,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -1907,7 +1907,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm2, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1929,7 +1929,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1946,7 +1946,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1963,7 +1963,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index f5a8281f3ca4..59bcf2da6049 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -1614,29 +1614,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pmaxub %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pmaxub %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pmaxub %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1644,7 +1635,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> %a0)
@@ -1652,29 +1643,17 @@ define i8 @test_v2i8(<2 x i8> %a0) {
}
define i8 @test_v4i8(<4 x i8> %a0) {
-; SSE2-LABEL: test_v4i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pmaxub %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pmaxub %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pmaxub %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pmaxub %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pmaxub %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i8:
; AVX: # %bb.0:
@@ -1682,7 +1661,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1692,7 +1671,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> %a0)
@@ -1700,33 +1679,19 @@ define i8 @test_v4i8(<4 x i8> %a0) {
}
define i8 @test_v8i8(<8 x i8> %a0) {
-; SSE2-LABEL: test_v8i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pmaxub %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pmaxub %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pmaxub %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pmaxub %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pmaxub %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pmaxub %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pmaxub %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pmaxub %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8:
; AVX: # %bb.0:
@@ -1736,7 +1701,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1748,7 +1713,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a0)
@@ -1780,7 +1745,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1792,7 +1757,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: notb %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -1804,7 +1769,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -1816,7 +1781,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
@@ -1851,7 +1816,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1865,7 +1830,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1880,7 +1845,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1894,7 +1859,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -1908,7 +1873,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
@@ -1948,7 +1913,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -1965,7 +1930,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -1981,7 +1946,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -1997,7 +1962,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -2013,7 +1978,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
@@ -2061,7 +2026,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
@@ -2084,7 +2049,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -2102,7 +2067,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -2119,7 +2084,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -2136,7 +2101,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index 39a45529a287..25e3938a6881 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -1519,29 +1519,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pminub %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pminub %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pminub %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1549,7 +1540,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> %a0)
@@ -1557,29 +1548,17 @@ define i8 @test_v2i8(<2 x i8> %a0) {
}
define i8 @test_v4i8(<4 x i8> %a0) {
-; SSE2-LABEL: test_v4i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pminub %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pminub %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pminub %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pminub %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pminub %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i8:
; AVX: # %bb.0:
@@ -1587,7 +1566,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1597,7 +1576,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> %a0)
@@ -1605,33 +1584,19 @@ define i8 @test_v4i8(<4 x i8> %a0) {
}
define i8 @test_v8i8(<8 x i8> %a0) {
-; SSE2-LABEL: test_v8i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pminub %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pminub %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pminub %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pminub %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pminub %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pminub %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pminub %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pminub %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8:
; AVX: # %bb.0:
@@ -1641,7 +1606,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1653,7 +1618,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a0)
@@ -1683,7 +1648,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1692,7 +1657,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -1701,7 +1666,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a0)
@@ -1733,7 +1698,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1744,7 +1709,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1756,7 +1721,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1768,7 +1733,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1805,7 +1770,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1819,7 +1784,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1832,7 +1797,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1846,7 +1811,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1891,7 +1856,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm2, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@@ -1911,7 +1876,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1926,7 +1891,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1941,7 +1906,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
index 812259a334a1..35e6db38a584 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
@@ -684,29 +684,20 @@ define i16 @test_v64i16(<64 x i16> %a0) {
;
define i8 @test_v2i8(<2 x i8> %a0) {
-; SSE2-LABEL: test_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> %a0)
@@ -714,29 +705,17 @@ define i8 @test_v2i8(<2 x i8> %a0) {
}
define i8 @test_v4i8(<4 x i8> %a0) {
-; SSE2-LABEL: test_v4i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i8:
; AVX: # %bb.0:
@@ -744,7 +723,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> %a0)
@@ -752,33 +731,19 @@ define i8 @test_v4i8(<4 x i8> %a0) {
}
define i8 @test_v8i8(<8 x i8> %a0) {
-; SSE2-LABEL: test_v8i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v8i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i8:
; AVX: # %bb.0:
@@ -788,7 +753,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %a0)
@@ -796,37 +761,21 @@ define i8 @test_v8i8(<8 x i8> %a0) {
}
define i8 @test_v16i8(<16 x i8> %a0) {
-; SSE2-LABEL: test_v16i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v16i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v16i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
@@ -838,7 +787,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %a0)
@@ -846,39 +795,22 @@ define i8 @test_v16i8(<16 x i8> %a0) {
}
define i8 @test_v32i8(<32 x i8> %a0) {
-; SSE2-LABEL: test_v32i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v32i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v32i8:
; AVX1: # %bb.0:
@@ -892,7 +824,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -909,7 +841,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -926,7 +858,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -935,43 +867,24 @@ define i8 @test_v32i8(<32 x i8> %a0) {
}
define i8 @test_v64i8(<64 x i8> %a0) {
-; SSE2-LABEL: test_v64i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm3, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v64i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm3, %xmm1
-; SSE41-NEXT: pxor %xmm2, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v64i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v64i8:
; AVX1: # %bb.0:
@@ -986,7 +899,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1004,7 +917,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1023,7 +936,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1032,51 +945,28 @@ define i8 @test_v64i8(<64 x i8> %a0) {
}
define i8 @test_v128i8(<128 x i8> %a0) {
-; SSE2-LABEL: test_v128i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm6, %xmm2
-; SSE2-NEXT: pxor %xmm7, %xmm3
-; SSE2-NEXT: pxor %xmm5, %xmm3
-; SSE2-NEXT: pxor %xmm1, %xmm3
-; SSE2-NEXT: pxor %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm3, %xmm2
-; SSE2-NEXT: pxor %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: test_v128i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm6, %xmm2
-; SSE41-NEXT: pxor %xmm7, %xmm3
-; SSE41-NEXT: pxor %xmm5, %xmm3
-; SSE41-NEXT: pxor %xmm1, %xmm3
-; SSE41-NEXT: pxor %xmm4, %xmm2
-; SSE41-NEXT: pxor %xmm3, %xmm2
-; SSE41-NEXT: pxor %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE41-NEXT: pxor %xmm2, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrb $0, %xmm1, %eax
-; SSE41-NEXT: # kill: def $al killed $al killed $eax
-; SSE41-NEXT: retq
+; SSE-LABEL: test_v128i8:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm6, %xmm2
+; SSE-NEXT: pxor %xmm7, %xmm3
+; SSE-NEXT: pxor %xmm5, %xmm3
+; SSE-NEXT: pxor %xmm1, %xmm3
+; SSE-NEXT: pxor %xmm4, %xmm2
+; SSE-NEXT: pxor %xmm3, %xmm2
+; SSE-NEXT: pxor %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
+; SSE-NEXT: retq
;
; AVX1-LABEL: test_v128i8:
; AVX1: # %bb.0:
@@ -1093,7 +983,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1113,7 +1003,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1133,7 +1023,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/widen_bitops-0.ll b/llvm/test/CodeGen/X86/widen_bitops-0.ll
index ec7ce9934a98..02a665d15a61 100644
--- a/llvm/test/CodeGen/X86/widen_bitops-0.ll
+++ b/llvm/test/CodeGen/X86/widen_bitops-0.ll
@@ -138,7 +138,7 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pand %xmm0, %xmm1
-; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X32-SSE-NEXT: movd %xmm1, %eax
; X32-SSE-NEXT: pextrb $1, %xmm1, %edx
; X32-SSE-NEXT: pextrb $2, %xmm1, %ecx
; X32-SSE-NEXT: # kill: def $al killed $al killed $eax
@@ -155,7 +155,7 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pinsrb $1, %esi, %xmm1
; X64-SSE-NEXT: pinsrb $2, %edx, %xmm1
; X64-SSE-NEXT: pand %xmm0, %xmm1
-; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: pextrb $1, %xmm1, %edx
; X64-SSE-NEXT: pextrb $2, %xmm1, %ecx
; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
@@ -179,7 +179,7 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pxor %xmm0, %xmm1
-; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X32-SSE-NEXT: movd %xmm1, %eax
; X32-SSE-NEXT: pextrb $1, %xmm1, %edx
; X32-SSE-NEXT: pextrb $2, %xmm1, %ecx
; X32-SSE-NEXT: # kill: def $al killed $al killed $eax
@@ -196,7 +196,7 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pinsrb $1, %esi, %xmm1
; X64-SSE-NEXT: pinsrb $2, %edx, %xmm1
; X64-SSE-NEXT: pxor %xmm0, %xmm1
-; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: pextrb $1, %xmm1, %edx
; X64-SSE-NEXT: pextrb $2, %xmm1, %ecx
; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
@@ -220,7 +220,7 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: por %xmm0, %xmm1
-; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X32-SSE-NEXT: movd %xmm1, %eax
; X32-SSE-NEXT: pextrb $1, %xmm1, %edx
; X32-SSE-NEXT: pextrb $2, %xmm1, %ecx
; X32-SSE-NEXT: # kill: def $al killed $al killed $eax
@@ -237,7 +237,7 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pinsrb $1, %esi, %xmm1
; X64-SSE-NEXT: pinsrb $2, %edx, %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
-; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: pextrb $1, %xmm1, %edx
; X64-SSE-NEXT: pextrb $2, %xmm1, %ecx
; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
More information about the llvm-commits
mailing list