[llvm] r245503 - [DAGCombiner] Added SMAX/SMIN/UMAX/UMIN constant folding
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 19 14:11:58 PDT 2015
Author: rksimon
Date: Wed Aug 19 16:11:58 2015
New Revision: 245503
URL: http://llvm.org/viewvc/llvm-project?rev=245503&view=rev
Log:
[DAGCombiner] Added SMAX/SMIN/UMAX/UMIN constant folding
We still need to add constant folding of vector comparisons to fold the tests for targets that don't support the respective min/max nodes
I needed to update 2011-12-06-AVXVectorExtractCombine to load a vector instead of using a constant vector to prevent it folding
Differential Revision: http://reviews.llvm.org/D12118
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
llvm/trunk/test/CodeGen/X86/vec_minmax_sint.ll
llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=245503&r1=245502&r2=245503&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Aug 19 16:11:58 2015
@@ -687,7 +687,7 @@ public:
SDValue N3, SDValue N4);
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
SDValue N3, SDValue N4, SDValue N5);
-
+
// Specialize again based on number of operands for nodes with a VTList
// rather than a single VT.
SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs);
@@ -1072,6 +1072,10 @@ public:
// target info.
switch (Opcode) {
case ISD::ADD:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::MUL:
case ISD::MULHU:
case ISD::MULHS:
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=245503&r1=245502&r2=245503&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Aug 19 16:11:58 2015
@@ -245,6 +245,7 @@ namespace {
SDValue visitUMULO(SDNode *N);
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
@@ -1341,6 +1342,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::UMULO: return visitUMULO(N);
case ISD::SDIVREM: return visitSDIVREM(N);
case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
@@ -2623,6 +2628,30 @@ SDValue DAGCombiner::visitUDIVREM(SDNode
return SDValue();
}
+
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
+
+ // canonicalize constant to RHS
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+
+ return SDValue();
+}
/// If this is a binary operator with two operands of the same opcode, try to
/// simplify it.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=245503&r1=245502&r2=245503&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Aug 19 16:11:58 2015
@@ -3182,6 +3182,10 @@ static std::pair<APInt, bool> FoldValue(
case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+ case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+ case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+ case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+ case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
@@ -3356,6 +3360,10 @@ SDValue SelectionDAG::getNode(unsigned O
case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -5273,7 +5281,7 @@ SelectionDAG::getMaskedGather(SDVTList V
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- MaskedGatherSDNode *N =
+ MaskedGatherSDNode *N =
new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
Ops, VTs, VT, MMO);
CSEMap.InsertNode(N, IP);
Modified: llvm/trunk/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll?rev=245503&r1=245502&r2=245503&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll Wed Aug 19 16:11:58 2015
@@ -4,13 +4,14 @@
define void @test(<4 x i32>* nocapture %p) nounwind {
; CHECK-LABEL: test:
; CHECK: vpxor %xmm0, %xmm0, %xmm0
- ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
- ; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
+ ; CHECK-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0
+ ; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
; CHECK-NEXT: ret
- %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
- %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
- %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- store <4 x i32> %c, <4 x i32>* %p, align 1
+ %a = load <4 x i32>, <4 x i32>* %p, align 1
+ %b = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a, <4 x i32> zeroinitializer) nounwind
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+ %d = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ store <4 x i32> %d, <4 x i32>* %p, align 1
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/vec_minmax_sint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_minmax_sint.ll?rev=245503&r1=245502&r2=245503&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_minmax_sint.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_minmax_sint.ll Wed Aug 19 16:11:58 2015
@@ -1807,20 +1807,17 @@ define <4 x i32> @max_gt_v4i32c() {
;
; SSE41-LABEL: max_gt_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_gt_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -1850,40 +1847,20 @@ define <8 x i32> @max_gt_v8i32c() {
;
; SSE41-LABEL: max_gt_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: max_gt_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_gt_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_gt_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_gt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp sgt <8 x i32> %1, %2
@@ -1894,14 +1871,12 @@ define <8 x i32> @max_gt_v8i32c() {
define <8 x i16> @max_gt_v8i16c() {
; SSE-LABEL: max_gt_v8i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; SSE-NEXT: retq
;
; AVX-LABEL: max_gt_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -1913,32 +1888,14 @@ define <8 x i16> @max_gt_v8i16c() {
define <16 x i16> @max_gt_v16i16c() {
; SSE-LABEL: max_gt_v16i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
-; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm1
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
; SSE-NEXT: retq
;
-; AVX1-LABEL: max_gt_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_gt_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_gt_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_gt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp sgt <16 x i16> %1, %2
@@ -1960,20 +1917,17 @@ define <16 x i8> @max_gt_v16i8c() {
;
; SSE41-LABEL: max_gt_v16i8c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE41-NEXT: pmaxsb {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v16i8c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE42-NEXT: pmaxsb {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_gt_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpmaxsb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
@@ -2213,20 +2167,17 @@ define <4 x i32> @max_ge_v4i32c() {
;
; SSE41-LABEL: max_ge_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_ge_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -2260,40 +2211,20 @@ define <8 x i32> @max_ge_v8i32c() {
;
; SSE41-LABEL: max_ge_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: max_ge_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_ge_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_ge_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_ge_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp sge <8 x i32> %1, %2
@@ -2304,14 +2235,12 @@ define <8 x i32> @max_ge_v8i32c() {
define <8 x i16> @max_ge_v8i16c() {
; SSE-LABEL: max_ge_v8i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; SSE-NEXT: retq
;
; AVX-LABEL: max_ge_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -2323,32 +2252,14 @@ define <8 x i16> @max_ge_v8i16c() {
define <16 x i16> @max_ge_v16i16c() {
; SSE-LABEL: max_ge_v16i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
-; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm1
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
; SSE-NEXT: retq
;
-; AVX1-LABEL: max_ge_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_ge_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_ge_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_ge_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp sge <16 x i16> %1, %2
@@ -2372,20 +2283,17 @@ define <16 x i8> @max_ge_v16i8c() {
;
; SSE41-LABEL: max_ge_v16i8c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE41-NEXT: pmaxsb {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v16i8c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE42-NEXT: pmaxsb {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_ge_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpmaxsb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
@@ -2601,20 +2509,17 @@ define <4 x i32> @min_lt_v4i32c() {
;
; SSE41-LABEL: min_lt_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_lt_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -2644,40 +2549,20 @@ define <8 x i32> @min_lt_v8i32c() {
;
; SSE41-LABEL: min_lt_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: min_lt_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_lt_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_lt_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_lt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp slt <8 x i32> %1, %2
@@ -2688,14 +2573,12 @@ define <8 x i32> @min_lt_v8i32c() {
define <8 x i16> @min_lt_v8i16c() {
; SSE-LABEL: min_lt_v8i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; SSE-NEXT: retq
;
; AVX-LABEL: min_lt_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -2707,32 +2590,14 @@ define <8 x i16> @min_lt_v8i16c() {
define <16 x i16> @min_lt_v16i16c() {
; SSE-LABEL: min_lt_v16i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
-; SSE-NEXT: pminsw {{.*}}(%rip), %xmm1
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: min_lt_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_lt_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_lt_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_lt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp slt <16 x i16> %1, %2
@@ -2754,20 +2619,17 @@ define <16 x i8> @min_lt_v16i8c() {
;
; SSE41-LABEL: min_lt_v16i8c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE41-NEXT: pminsb {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v16i8c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE42-NEXT: pminsb {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_lt_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpminsb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
@@ -3007,20 +2869,17 @@ define <4 x i32> @min_le_v4i32c() {
;
; SSE41-LABEL: min_le_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_le_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -3054,40 +2913,20 @@ define <8 x i32> @min_le_v8i32c() {
;
; SSE41-LABEL: min_le_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: min_le_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_le_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_le_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_le_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp sle <8 x i32> %1, %2
@@ -3098,14 +2937,12 @@ define <8 x i32> @min_le_v8i32c() {
define <8 x i16> @min_le_v8i16c() {
; SSE-LABEL: min_le_v8i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; SSE-NEXT: retq
;
; AVX-LABEL: min_le_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -3117,32 +2954,14 @@ define <8 x i16> @min_le_v8i16c() {
define <16 x i16> @min_le_v16i16c() {
; SSE-LABEL: min_le_v16i16c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
-; SSE-NEXT: pminsw {{.*}}(%rip), %xmm1
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: min_le_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_le_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_le_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_le_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp sle <16 x i16> %1, %2
@@ -3166,20 +2985,17 @@ define <16 x i8> @min_le_v16i8c() {
;
; SSE41-LABEL: min_le_v16i8c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE41-NEXT: pminsb {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v16i8c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE42-NEXT: pminsb {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_le_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpminsb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
Modified: llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll?rev=245503&r1=245502&r2=245503&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll Wed Aug 19 16:11:58 2015
@@ -1942,20 +1942,17 @@ define <4 x i32> @max_gt_v4i32c() {
;
; SSE41-LABEL: max_gt_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_gt_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -1983,40 +1980,20 @@ define <8 x i32> @max_gt_v8i32c() {
;
; SSE41-LABEL: max_gt_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: max_gt_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_gt_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_gt_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_gt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp ugt <8 x i32> %1, %2
@@ -2037,20 +2014,17 @@ define <8 x i16> @max_gt_v8i16c() {
;
; SSE41-LABEL: max_gt_v8i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v8i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_gt_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -2078,40 +2052,20 @@ define <16 x i16> @max_gt_v16i16c() {
;
; SSE41-LABEL: max_gt_v16i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_gt_v16i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: max_gt_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_gt_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_gt_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_gt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp ugt <16 x i16> %1, %2
@@ -2122,14 +2076,12 @@ define <16 x i16> @max_gt_v16i16c() {
define <16 x i8> @max_gt_v16i8c() {
; SSE-LABEL: max_gt_v16i8c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; SSE-NEXT: retq
;
; AVX-LABEL: max_gt_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
@@ -2364,20 +2316,17 @@ define <4 x i32> @max_ge_v4i32c() {
;
; SSE41-LABEL: max_ge_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_ge_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -2407,40 +2356,20 @@ define <8 x i32> @max_ge_v8i32c() {
;
; SSE41-LABEL: max_ge_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: max_ge_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_ge_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_ge_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_ge_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp uge <8 x i32> %1, %2
@@ -2464,20 +2393,17 @@ define <8 x i16> @max_ge_v8i16c() {
;
; SSE41-LABEL: max_ge_v8i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v8i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; SSE42-NEXT: retq
;
; AVX-LABEL: max_ge_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -2510,40 +2436,20 @@ define <16 x i16> @max_ge_v16i16c() {
;
; SSE41-LABEL: max_ge_v16i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
; SSE41-NEXT: retq
;
; SSE42-LABEL: max_ge_v16i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: max_ge_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: max_ge_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: max_ge_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: max_ge_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp uge <16 x i16> %1, %2
@@ -2554,14 +2460,12 @@ define <16 x i16> @max_ge_v16i16c() {
define <16 x i8> @max_ge_v16i8c() {
; SSE-LABEL: max_ge_v16i8c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; SSE-NEXT: retq
;
; AVX-LABEL: max_ge_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
@@ -2773,20 +2677,17 @@ define <4 x i32> @min_lt_v4i32c() {
;
; SSE41-LABEL: min_lt_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_lt_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -2814,40 +2715,20 @@ define <8 x i32> @min_lt_v8i32c() {
;
; SSE41-LABEL: min_lt_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: min_lt_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_lt_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_lt_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_lt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp ult <8 x i32> %1, %2
@@ -2870,20 +2751,17 @@ define <8 x i16> @min_lt_v8i16c() {
;
; SSE41-LABEL: min_lt_v8i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v8i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_lt_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i32 0
@@ -2911,40 +2789,20 @@ define <16 x i16> @min_lt_v16i16c() {
;
; SSE41-LABEL: min_lt_v16i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_lt_v16i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: min_lt_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_lt_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_lt_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_lt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i32 0
%3 = icmp ult <16 x i16> %1, %2
@@ -2955,14 +2813,12 @@ define <16 x i16> @min_lt_v16i16c() {
define <16 x i8> @min_lt_v16i8c() {
; SSE-LABEL: min_lt_v16i8c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE-NEXT: pminub {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; SSE-NEXT: retq
;
; AVX-LABEL: min_lt_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i32 0
@@ -3197,20 +3053,17 @@ define <4 x i32> @min_le_v4i32c() {
;
; SSE41-LABEL: min_le_v4i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v4i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_le_v4i32c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
-; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
; AVX-NEXT: retq
%1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
%2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
@@ -3240,40 +3093,20 @@ define <8 x i32> @min_le_v8i32c() {
;
; SSE41-LABEL: min_le_v8i32c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v8i32c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: min_le_v8i32c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
-; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
-; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_le_v8i32c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_le_v8i32c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
-; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_le_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
%2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
%3 = icmp ule <8 x i32> %1, %2
@@ -3297,20 +3130,17 @@ define <8 x i16> @min_le_v8i16c() {
;
; SSE41-LABEL: min_le_v8i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v8i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; SSE42-NEXT: retq
;
; AVX-LABEL: min_le_v8i16c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
-; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
; AVX-NEXT: retq
%1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
%2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
@@ -3343,40 +3173,20 @@ define <16 x i16> @min_le_v16i16c() {
;
; SSE41-LABEL: min_le_v16i16c:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
; SSE41-NEXT: retq
;
; SSE42-LABEL: min_le_v16i16c:
; SSE42: # BB#0:
-; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
-; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1
+; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: min_le_v16i16c:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
-; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
-; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: min_le_v16i16c:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: min_le_v16i16c:
-; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
-; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: min_le_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
%2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
%3 = icmp ule <16 x i16> %1, %2
@@ -3387,14 +3197,12 @@ define <16 x i16> @min_le_v16i16c() {
define <16 x i8> @min_le_v16i8c() {
; SSE-LABEL: min_le_v16i8c:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; SSE-NEXT: pminub {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; SSE-NEXT: retq
;
; AVX-LABEL: min_le_v16i8c:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
; AVX-NEXT: retq
%1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
%2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
More information about the llvm-commits
mailing list