[llvm] r318917 - [X86][SSE] Use (V)PHMINPOSUW for vXi16 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 23 05:50:27 PST 2017
Author: rksimon
Date: Thu Nov 23 05:50:27 2017
New Revision: 318917
URL: http://llvm.org/viewvc/llvm-project?rev=318917&view=rev
Log:
[X86][SSE] Use (V)PHMINPOSUW for vXi16 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)
(V)PHMINPOSUW determines the UMIN element in an v8i16 input, with suitable bit flipping it can also be used for SMAX/SMIN/UMAX cases as well.
This patch matches vXi16 SMAX/SMIN/UMAX/UMIN horizontal reductions and reduces the input down to a v8i16 vector before calling (V)PHMINPOSUW.
A later patch will use this for v16i8 reductions as well (PR32841).
Differential Revision: https://reviews.llvm.org/D39729
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Nov 23 05:50:27 2017
@@ -25073,6 +25073,7 @@ const char *X86TargetLowering::getTarget
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
+ case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
@@ -30326,6 +30327,66 @@ static SDValue createPSADBW(SelectionDAG
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}
+// Attempt to replace an min/max v8i16 horizontal reduction with PHMINPOSUW.
+static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Bail without SSE41.
+ if (!Subtarget.hasSSE41())
+ return SDValue();
+
+ EVT ExtractVT = Extract->getValueType(0);
+ if (ExtractVT != MVT::i16)
+ return SDValue();
+
+ // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
+ unsigned BinOp;
+ SDValue Src = matchBinOpReduction(
+ Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
+ if (!Src)
+ return SDValue();
+
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ if (SrcSVT != MVT::i16 || (SrcVT.getSizeInBits() % 128) != 0)
+ return SDValue();
+
+ SDLoc DL(Extract);
+ SDValue MinPos = Src;
+
+ // First, reduce the source down to 128-bit, applying BinOp to lo/hi.
+ while (SrcVT.getSizeInBits() > 128) {
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ unsigned NumSubElts = NumElts / 2;
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcSVT, NumSubElts);
+ unsigned SubSizeInBits = SrcVT.getSizeInBits();
+ SDValue Lo = extractSubVector(MinPos, 0, DAG, DL, SubSizeInBits);
+ SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits);
+ MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
+ }
+ assert(SrcVT == MVT::v8i16 && "Unexpected value type");
+
+ // PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask
+ // to flip the value accordingly.
+ SDValue Mask;
+ if (BinOp == ISD::SMAX)
+ Mask = DAG.getConstant(APInt::getSignedMaxValue(16), DL, SrcVT);
+ else if (BinOp == ISD::SMIN)
+ Mask = DAG.getConstant(APInt::getSignedMinValue(16), DL, SrcVT);
+ else if (BinOp == ISD::UMAX)
+ Mask = DAG.getConstant(APInt::getAllOnesValue(16), DL, SrcVT);
+
+ if (Mask)
+ MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
+
+ MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, SrcVT, MinPos);
+
+ if (Mask)
+ MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos,
+ DAG.getIntPtrConstant(0, DL));
+}
+
// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
static SDValue combineHorizontalPredicateResult(SDNode *Extract,
SelectionDAG &DAG,
@@ -30633,6 +30694,10 @@ static SDValue combineExtractVectorElt(S
if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
return Cmp;
+ // Attempt to replace min/max v8i16 reductions with PHMINPOSUW.
+ if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
+ return MinMax;
+
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
if (SrcVT != MVT::v4i32)
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Nov 23 05:50:27 2017
@@ -335,6 +335,9 @@ namespace llvm {
// Vector integer comparisons, the result is in a mask vector.
PCMPEQM, PCMPGTM,
+ // v8i16 Horizontal minimum and position.
+ PHMINPOS,
+
MULTISHIFT,
/// Vector comparison generating mask bits for fp and
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Thu Nov 23 05:50:27 2017
@@ -182,6 +182,9 @@ def X86cmpmu : SDNode<"X86ISD::CMPMU"
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND", X86CmpMaskCCScalarRound>;
+def X86phminpos: SDNode<"X86ISD::PHMINPOS",
+ SDTypeProfile<1, 1, [SDTCisVT<0, v8i16>, SDTCisVT<1, v8i16>]>>;
+
def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>, SDTCisInt<0>,
SDTCisInt<1>]>;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Nov 23 05:50:27 2017
@@ -6188,22 +6188,20 @@ let Defs = [EFLAGS], Predicates = [HasPO
Sched<[WriteFAddLd]>, XS;
}
-
-
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, PatFrag ld_frag,
+ SDNode OpNode, PatFrag ld_frag,
X86FoldableSchedWrite Sched> {
def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>,
Sched<[Sched]>;
def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId128 (bitconvert (ld_frag addr:$src))))]>,
+ (v8i16 (OpNode (v8i16 (bitconvert (ld_frag addr:$src))))))]>,
Sched<[Sched.Folded]>;
}
@@ -6211,10 +6209,10 @@ multiclass SS41I_unop_rm_int_v16<bits<8>
// model, although the naming is misleading.
let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
- int_x86_sse41_phminposuw, loadv2i64,
+ X86phminpos, loadv2i64,
WriteVecIMul>, VEX, VEX_WIG;
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
- int_x86_sse41_phminposuw, memopv2i64,
+ X86phminpos, memopv2i64,
WriteVecIMul>;
/// SS48I_binop_rm - Simple SSE41 binary operator.
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Thu Nov 23 05:50:27 2017
@@ -1679,6 +1679,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse41_phminposuw, INTR_TYPE_1OP, X86ISD::PHMINPOS, 0),
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse41_round_pd, ROUNDP, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(sse41_round_ps, ROUNDP, X86ISD::VRNDSCALE, 0),
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll Thu Nov 23 05:50:27 2017
@@ -196,52 +196,68 @@ define i32 @test_reduce_v4i32(<4 x i32>
}
define i16 @test_reduce_v8i16(<8 x i16> %a0) {
-; X86-SSE-LABEL: test_reduce_v8i16:
-; X86-SSE: ## BB#0:
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: psrld $16, %xmm1
-; X86-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X86-SSE-NEXT: retl
+; X86-SSE2-LABEL: test_reduce_v8i16:
+; X86-SSE2: ## BB#0:
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrld $16, %xmm1
+; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE42-LABEL: test_reduce_v8i16:
+; X86-SSE42: ## BB#0:
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## BB#0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: test_reduce_v8i16:
-; X64-SSE: ## BB#0:
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE-NEXT: psrld $16, %xmm1
-; X64-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X64-SSE-NEXT: retq
+; X64-SSE2-LABEL: test_reduce_v8i16:
+; X64-SSE2: ## BB#0:
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT: psrld $16, %xmm1
+; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE42-LABEL: test_reduce_v8i16:
+; X64-SSE42: ## BB#0:
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## BB#0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX-NEXT: retq
@@ -719,30 +735,39 @@ define i32 @test_reduce_v8i32(<8 x i32>
}
define i16 @test_reduce_v16i16(<16 x i16> %a0) {
-; X86-SSE-LABEL: test_reduce_v16i16:
-; X86-SSE: ## BB#0:
-; X86-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: psrld $16, %xmm1
-; X86-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X86-SSE-NEXT: retl
+; X86-SSE2-LABEL: test_reduce_v16i16:
+; X86-SSE2: ## BB#0:
+; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrld $16, %xmm1
+; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE42-LABEL: test_reduce_v16i16:
+; X86-SSE42: ## BB#0:
+; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE42-NEXT: retl
;
; X86-AVX1-LABEL: test_reduce_v16i16:
; X86-AVX1: ## BB#0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -751,42 +776,49 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-AVX2-LABEL: test_reduce_v16i16:
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
-; X64-SSE-LABEL: test_reduce_v16i16:
-; X64-SSE: ## BB#0:
-; X64-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE-NEXT: psrld $16, %xmm1
-; X64-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X64-SSE-NEXT: retq
+; X64-SSE2-LABEL: test_reduce_v16i16:
+; X64-SSE2: ## BB#0:
+; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT: psrld $16, %xmm1
+; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE42-LABEL: test_reduce_v16i16:
+; X64-SSE42: ## BB#0:
+; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE42-NEXT: retq
;
; X64-AVX1-LABEL: test_reduce_v16i16:
; X64-AVX1: ## BB#0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -795,13 +827,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX2-LABEL: test_reduce_v16i16:
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -810,13 +840,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX512-LABEL: test_reduce_v16i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
@@ -1511,21 +1539,34 @@ define i32 @test_reduce_v16i32(<16 x i32
}
define i16 @test_reduce_v32i16(<32 x i16> %a0) {
-; X86-SSE-LABEL: test_reduce_v32i16:
-; X86-SSE: ## BB#0:
-; X86-SSE-NEXT: pmaxsw %xmm3, %xmm1
-; X86-SSE-NEXT: pmaxsw %xmm2, %xmm0
-; X86-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: psrld $16, %xmm1
-; X86-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X86-SSE-NEXT: retl
+; X86-SSE2-LABEL: test_reduce_v32i16:
+; X86-SSE2: ## BB#0:
+; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1
+; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0
+; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrld $16, %xmm1
+; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE42-LABEL: test_reduce_v32i16:
+; X86-SSE42: ## BB#0:
+; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1
+; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0
+; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE42-NEXT: retl
;
; X86-AVX1-LABEL: test_reduce_v32i16:
; X86-AVX1: ## BB#0:
@@ -1534,12 +1575,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -1549,33 +1588,44 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
-; X64-SSE-LABEL: test_reduce_v32i16:
-; X64-SSE: ## BB#0:
-; X64-SSE-NEXT: pmaxsw %xmm3, %xmm1
-; X64-SSE-NEXT: pmaxsw %xmm2, %xmm0
-; X64-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE-NEXT: psrld $16, %xmm1
-; X64-SSE-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X64-SSE-NEXT: retq
+; X64-SSE2-LABEL: test_reduce_v32i16:
+; X64-SSE2: ## BB#0:
+; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1
+; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0
+; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT: psrld $16, %xmm1
+; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
+; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE42-LABEL: test_reduce_v32i16:
+; X64-SSE42: ## BB#0:
+; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1
+; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0
+; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE42-NEXT: retq
;
; X64-AVX1-LABEL: test_reduce_v32i16:
; X64-AVX1: ## BB#0:
@@ -1584,12 +1634,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -1599,13 +1647,11 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -1614,15 +1660,13 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX512-LABEL: test_reduce_v32i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll Thu Nov 23 05:50:27 2017
@@ -198,52 +198,68 @@ define i32 @test_reduce_v4i32(<4 x i32>
}
define i16 @test_reduce_v8i16(<8 x i16> %a0) {
-; X86-SSE-LABEL: test_reduce_v8i16:
-; X86-SSE: ## BB#0:
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: psrld $16, %xmm1
-; X86-SSE-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X86-SSE-NEXT: retl
+; X86-SSE2-LABEL: test_reduce_v8i16:
+; X86-SSE2: ## BB#0:
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrld $16, %xmm1
+; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE42-LABEL: test_reduce_v8i16:
+; X86-SSE42: ## BB#0:
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## BB#0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: test_reduce_v8i16:
-; X64-SSE: ## BB#0:
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE-NEXT: psrld $16, %xmm1
-; X64-SSE-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X64-SSE-NEXT: retq
+; X64-SSE2-LABEL: test_reduce_v8i16:
+; X64-SSE2: ## BB#0:
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT: psrld $16, %xmm1
+; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE42-LABEL: test_reduce_v8i16:
+; X64-SSE42: ## BB#0:
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## BB#0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX-NEXT: retq
@@ -723,30 +739,39 @@ define i32 @test_reduce_v8i32(<8 x i32>
}
define i16 @test_reduce_v16i16(<16 x i16> %a0) {
-; X86-SSE-LABEL: test_reduce_v16i16:
-; X86-SSE: ## BB#0:
-; X86-SSE-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: psrld $16, %xmm1
-; X86-SSE-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X86-SSE-NEXT: retl
+; X86-SSE2-LABEL: test_reduce_v16i16:
+; X86-SSE2: ## BB#0:
+; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrld $16, %xmm1
+; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE42-LABEL: test_reduce_v16i16:
+; X86-SSE42: ## BB#0:
+; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE42-NEXT: retl
;
; X86-AVX1-LABEL: test_reduce_v16i16:
; X86-AVX1: ## BB#0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -755,42 +780,49 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-AVX2-LABEL: test_reduce_v16i16:
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
-; X64-SSE-LABEL: test_reduce_v16i16:
-; X64-SSE: ## BB#0:
-; X64-SSE-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE-NEXT: psrld $16, %xmm1
-; X64-SSE-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X64-SSE-NEXT: retq
+; X64-SSE2-LABEL: test_reduce_v16i16:
+; X64-SSE2: ## BB#0:
+; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT: psrld $16, %xmm1
+; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE42-LABEL: test_reduce_v16i16:
+; X64-SSE42: ## BB#0:
+; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE42-NEXT: retq
;
; X64-AVX1-LABEL: test_reduce_v16i16:
; X64-AVX1: ## BB#0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -799,13 +831,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX2-LABEL: test_reduce_v16i16:
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -814,13 +844,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX512-LABEL: test_reduce_v16i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
@@ -1513,21 +1541,34 @@ define i32 @test_reduce_v16i32(<16 x i32
}
define i16 @test_reduce_v32i16(<32 x i16> %a0) {
-; X86-SSE-LABEL: test_reduce_v32i16:
-; X86-SSE: ## BB#0:
-; X86-SSE-NEXT: pminsw %xmm3, %xmm1
-; X86-SSE-NEXT: pminsw %xmm2, %xmm0
-; X86-SSE-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: psrld $16, %xmm1
-; X86-SSE-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X86-SSE-NEXT: retl
+; X86-SSE2-LABEL: test_reduce_v32i16:
+; X86-SSE2: ## BB#0:
+; X86-SSE2-NEXT: pminsw %xmm3, %xmm1
+; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
+; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrld $16, %xmm1
+; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE42-LABEL: test_reduce_v32i16:
+; X86-SSE42: ## BB#0:
+; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
+; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
+; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-SSE42-NEXT: retl
;
; X86-AVX1-LABEL: test_reduce_v32i16:
; X86-AVX1: ## BB#0:
@@ -1536,12 +1577,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -1551,33 +1590,44 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
-; X64-SSE-LABEL: test_reduce_v32i16:
-; X64-SSE: ## BB#0:
-; X64-SSE-NEXT: pminsw %xmm3, %xmm1
-; X64-SSE-NEXT: pminsw %xmm2, %xmm0
-; X64-SSE-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE-NEXT: psrld $16, %xmm1
-; X64-SSE-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; X64-SSE-NEXT: retq
+; X64-SSE2-LABEL: test_reduce_v32i16:
+; X64-SSE2: ## BB#0:
+; X64-SSE2-NEXT: pminsw %xmm3, %xmm1
+; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
+; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT: psrld $16, %xmm1
+; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
+; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE42-LABEL: test_reduce_v32i16:
+; X64-SSE42: ## BB#0:
+; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
+; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
+; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-SSE42-NEXT: retq
;
; X64-AVX1-LABEL: test_reduce_v32i16:
; X64-AVX1: ## BB#0:
@@ -1586,12 +1636,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -1601,13 +1649,11 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -1616,15 +1662,13 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX512-LABEL: test_reduce_v32i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll Thu Nov 23 05:50:27 2017
@@ -259,25 +259,20 @@ define i16 @test_reduce_v8i16(<8 x i16>
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## BB#0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X86-SSE42-NEXT: movd %xmm1, %eax
+; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## BB#0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX-NEXT: retl
@@ -318,25 +313,20 @@ define i16 @test_reduce_v8i16(<8 x i16>
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## BB#0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X64-SSE42-NEXT: movd %xmm1, %eax
+; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## BB#0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX-NEXT: retq
@@ -879,14 +869,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-SSE42-LABEL: test_reduce_v16i16:
; X86-SSE42: ## BB#0:
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X86-SSE42-NEXT: movd %xmm1, %eax
+; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-SSE42-NEXT: retl
;
@@ -894,12 +881,10 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-AVX1: ## BB#0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -908,13 +893,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-AVX2-LABEL: test_reduce_v16i16:
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
@@ -965,14 +948,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-SSE42-LABEL: test_reduce_v16i16:
; X64-SSE42: ## BB#0:
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X64-SSE42-NEXT: movd %xmm1, %eax
+; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-SSE42-NEXT: retq
;
@@ -980,12 +960,10 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX1: ## BB#0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -994,13 +972,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX2-LABEL: test_reduce_v16i16:
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -1009,13 +985,11 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX512-LABEL: test_reduce_v16i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
@@ -1821,14 +1795,11 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-SSE42-NEXT: pmaxuw %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X86-SSE42-NEXT: movd %xmm1, %eax
+; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-SSE42-NEXT: retl
;
@@ -1839,12 +1810,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -1854,13 +1823,11 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
@@ -1929,14 +1896,11 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-SSE42-NEXT: pmaxuw %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1
-; X64-SSE42-NEXT: movd %xmm1, %eax
+; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-SSE42-NEXT: retq
;
@@ -1947,12 +1911,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -1962,13 +1924,11 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -1977,15 +1937,13 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX512-LABEL: test_reduce_v32i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll?rev=318917&r1=318916&r2=318917&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll Thu Nov 23 05:50:27 2017
@@ -261,25 +261,14 @@ define i16 @test_reduce_v8i16(<8 x i16>
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## BB#0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X86-SSE42-NEXT: movd %xmm1, %eax
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## BB#0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX-NEXT: retl
@@ -320,25 +309,14 @@ define i16 @test_reduce_v8i16(<8 x i16>
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## BB#0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X64-SSE42-NEXT: movd %xmm1, %eax
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## BB#0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX-NEXT: retq
@@ -885,14 +863,8 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-SSE42-LABEL: test_reduce_v16i16:
; X86-SSE42: ## BB#0:
; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X86-SSE42-NEXT: movd %xmm1, %eax
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-SSE42-NEXT: retl
;
@@ -900,12 +872,7 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-AVX1: ## BB#0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -914,13 +881,8 @@ define i16 @test_reduce_v16i16(<16 x i16
; X86-AVX2-LABEL: test_reduce_v16i16:
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
@@ -971,14 +933,8 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-SSE42-LABEL: test_reduce_v16i16:
; X64-SSE42: ## BB#0:
; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X64-SSE42-NEXT: movd %xmm1, %eax
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-SSE42-NEXT: retq
;
@@ -986,12 +942,7 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX1: ## BB#0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -1000,13 +951,8 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX2-LABEL: test_reduce_v16i16:
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -1015,13 +961,8 @@ define i16 @test_reduce_v16i16(<16 x i16
; X64-AVX512-LABEL: test_reduce_v16i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
@@ -1825,14 +1766,8 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-SSE42-NEXT: pminuw %xmm3, %xmm1
; X86-SSE42-NEXT: pminuw %xmm2, %xmm0
; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X86-SSE42-NEXT: movd %xmm1, %eax
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-SSE42-NEXT: retl
;
@@ -1843,12 +1778,7 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX1-NEXT: vzeroupper
@@ -1858,13 +1788,8 @@ define i16 @test_reduce_v32i16(<32 x i16
; X86-AVX2: ## BB#0:
; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X86-AVX2-NEXT: vzeroupper
@@ -1933,14 +1858,8 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-SSE42-NEXT: pminuw %xmm3, %xmm1
; X64-SSE42-NEXT: pminuw %xmm2, %xmm0
; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminuw %xmm0, %xmm1
-; X64-SSE42-NEXT: movd %xmm1, %eax
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-SSE42-NEXT: retq
;
@@ -1951,12 +1870,7 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX1-NEXT: vzeroupper
@@ -1966,13 +1880,8 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX2: ## BB#0:
; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX2-NEXT: vzeroupper
@@ -1981,15 +1890,10 @@ define i16 @test_reduce_v32i16(<32 x i16
; X64-AVX512-LABEL: test_reduce_v32i16:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; X64-AVX512-NEXT: vzeroupper
More information about the llvm-commits
mailing list