[llvm] r321070 - [X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 19 04:02:40 PST 2017
Author: rksimon
Date: Tue Dec 19 04:02:40 2017
New Revision: 321070
URL: http://llvm.org/viewvc/llvm-project?rev=321070&view=rev
Log:
[X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)
Extension to D39729 which performed this for vXi16, with the same bit flipping to handle SMAX/SMIN/UMAX cases, vXi8 UMIN horizontal reductions can be performed.
This makes use of the fact that by performing a pair-wise i8 SHUFFLE/UMIN before PHMINPOSUW, we both get the UMIN of each pair but also zero-extend the upper bits ready for v8i16.
Differential Revision: https://reviews.llvm.org/D41294
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321070&r1=321069&r2=321070&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 19 04:02:40 2017
@@ -30482,7 +30482,8 @@ static SDValue createPSADBW(SelectionDAG
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}
-// Attempt to replace an min/max v8i16 horizontal reduction with PHMINPOSUW.
+// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with
+// PHMINPOSUW.
static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Bail without SSE41.
@@ -30490,7 +30491,7 @@ static SDValue combineHorizontalMinMaxRe
return SDValue();
EVT ExtractVT = Extract->getValueType(0);
- if (ExtractVT != MVT::i16)
+ if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8)
return SDValue();
// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
@@ -30502,7 +30503,7 @@ static SDValue combineHorizontalMinMaxRe
EVT SrcVT = Src.getValueType();
EVT SrcSVT = SrcVT.getScalarType();
- if (SrcSVT != MVT::i16 || (SrcVT.getSizeInBits() % 128) != 0)
+ if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0)
return SDValue();
SDLoc DL(Extract);
@@ -30518,22 +30519,39 @@ static SDValue combineHorizontalMinMaxRe
SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits);
MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
}
- assert(SrcVT == MVT::v8i16 && "Unexpected value type");
+ assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) ||
+ (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) &&
+ "Unexpected value type");
// PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask
// to flip the value accordingly.
SDValue Mask;
+ unsigned MaskEltsBits = ExtractVT.getSizeInBits();
if (BinOp == ISD::SMAX)
- Mask = DAG.getConstant(APInt::getSignedMaxValue(16), DL, SrcVT);
+ Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::SMIN)
- Mask = DAG.getConstant(APInt::getSignedMinValue(16), DL, SrcVT);
+ Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::UMAX)
- Mask = DAG.getConstant(APInt::getAllOnesValue(16), DL, SrcVT);
+ Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT);
if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
- MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, SrcVT, MinPos);
+ // For v16i8 cases we need to perform UMIN on pairs of byte elements,
+ // shuffling each upper element down and insert zeros. This means that the
+ // v16i8 UMIN will leave the upper element as zero, performing zero-extension
+ // ready for the PHMINPOS.
+ if (ExtractVT == MVT::i8) {
+ SDValue Upper = DAG.getVectorShuffle(
+ SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
+ {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
+ MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
+ }
+
+ // Perform the PHMINPOS on a v8i16 vector,
+ MinPos = DAG.getBitcast(MVT::v8i16, MinPos);
+ MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos);
+ MinPos = DAG.getBitcast(SrcVT, MinPos);
if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
@@ -30849,7 +30867,7 @@ static SDValue combineExtractVectorElt(S
if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
return Cmp;
- // Attempt to replace min/max v8i16 reductions with PHMINPOSUW.
+ // Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW.
if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
return MinMax;
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll?rev=321070&r1=321069&r2=321070&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll Tue Dec 19 04:02:40 2017
@@ -309,30 +309,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX-NEXT: retl
@@ -371,30 +366,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX-NEXT: retq
@@ -906,16 +896,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -924,14 +911,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -940,15 +925,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX2-LABEL: test_reduce_v32i8:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -994,16 +977,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1012,14 +992,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1028,15 +1006,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX2-LABEL: test_reduce_v32i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1045,15 +1021,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX512-LABEL: test_reduce_v32i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
@@ -1743,16 +1717,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -1764,14 +1735,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -1781,15 +1750,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -1847,16 +1814,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1868,14 +1832,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1885,15 +1847,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1902,17 +1862,15 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX512-LABEL: test_reduce_v64i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll?rev=321070&r1=321069&r2=321070&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll Tue Dec 19 04:02:40 2017
@@ -311,30 +311,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX-NEXT: retl
@@ -373,30 +368,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX-NEXT: retq
@@ -910,16 +900,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -928,14 +915,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -944,15 +929,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX2-LABEL: test_reduce_v32i8:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -998,16 +981,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1016,14 +996,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1032,15 +1010,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX2-LABEL: test_reduce_v32i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1049,15 +1025,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX512-LABEL: test_reduce_v32i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
@@ -1745,16 +1719,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -1766,14 +1737,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -1783,15 +1752,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -1849,16 +1816,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1870,14 +1834,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1887,15 +1849,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1904,17 +1864,15 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX512-LABEL: test_reduce_v64i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll?rev=321070&r1=321069&r2=321070&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll Tue Dec 19 04:02:40 2017
@@ -362,30 +362,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
+; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX-NEXT: retl
@@ -408,30 +403,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
+; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX-NEXT: retq
@@ -1031,16 +1021,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
+; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -1049,14 +1036,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -1065,15 +1050,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX2-LABEL: test_reduce_v32i8:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -1099,16 +1082,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
+; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1117,14 +1097,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1133,15 +1111,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX2-LABEL: test_reduce_v32i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1150,15 +1126,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX512-LABEL: test_reduce_v32i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
@@ -1992,16 +1966,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-SSE42-NEXT: pmaxub %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
+; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -2013,14 +1984,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -2030,15 +1999,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -2068,16 +2035,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-SSE42-NEXT: pmaxub %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
+; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -2089,14 +2053,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -2106,15 +2068,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -2123,17 +2083,15 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX512-LABEL: test_reduce_v64i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll?rev=321070&r1=321069&r2=321070&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll Tue Dec 19 04:02:40 2017
@@ -352,30 +352,19 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminub %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminub %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX-NEXT: retl
@@ -398,30 +387,19 @@ define i8 @test_reduce_v16i8(<16 x i8> %
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminub %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminub %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX-NEXT: retq
@@ -1004,16 +982,10 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminub %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminub %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -1022,14 +994,9 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -1038,15 +1005,10 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X86-AVX2-LABEL: test_reduce_v32i8:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -1072,16 +1034,10 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminub %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminub %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1090,14 +1046,9 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1106,15 +1057,10 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX2-LABEL: test_reduce_v32i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1123,15 +1069,10 @@ define i8 @test_reduce_v32i8(<32 x i8> %
; X64-AVX512-LABEL: test_reduce_v32i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
@@ -1942,16 +1883,10 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-SSE42-NEXT: pminub %xmm3, %xmm1
; X86-SSE42-NEXT: pminub %xmm2, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminub %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminub %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -1963,14 +1898,9 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -1980,15 +1910,10 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -2018,16 +1943,10 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-SSE42-NEXT: pminub %xmm3, %xmm1
; X64-SSE42-NEXT: pminub %xmm2, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminub %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminub %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -2039,14 +1958,9 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -2056,15 +1970,10 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -2073,17 +1982,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %
; X64-AVX512-LABEL: test_reduce_v64i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
More information about the llvm-commits
mailing list