[llvm] [X86] LowerMINMAX - use valuetracking to attempt to find a smaller type that can efficiently lower min/max ops (PR #174294)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 3 13:56:07 PST 2026


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/174294

We currently use the generic expansions to custom lower integer min/max instructions, but if we have sufficient leading bits, SSE/AVX is always better off handling it directly with smaller types.

vXi64 cmp/min/max is particularly weak, and as we narrow the types the better legality we have - this approach seems to work well for x86, but I'm not sure if its valid enough to try generically in this manner.

However, I added the signed/unsigned flip fold to expandIntMINMAX to further improve SSE2 codegen, similar to what we already attempt in DAGCombiner (which with a bit more work we might be able to remove now).

All thats missing is better ComputeNumSignBits handling for vXi64 ashr expansion, which still misses a lot of cases when split across vXi32 types and shuffles.

Fixes #174169

>From ef192ca13a1b7e6d9fe4f0a6a9c9c161ce3d7193 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Sat, 3 Jan 2026 21:54:00 +0000
Subject: [PATCH] [X86] LowerMINMAX - use valuetracking to attempt to find a
 smaller type that can efficiently lower min/max ops

We currently use the generic expansions to custom lower integer min/max instructions, but if we have sufficient leading bits, SSE/AVX is always better off handling it directly with smaller types.

vXi64 cmp/min/max is particularly weak, and as we narrow the types the better legality we have - this approach seems to work well for x86, but I'm not sure if its valid enough to try generically.

However, I added the signed/unsigned flip fold to expandIntMINMAX to further improve SSE2 codegen, similar to what we already attempt in DAGCombiner (which with a bit more work we might be able to remove now).

All thats missing is better ComputeNumSignBits handling for vXi64 ashr expansion, which still misses a lot of cases when split across vXi32 types and shuffles.

Fixes #174169
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 22 +++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 32 +++++++
 llvm/test/CodeGen/X86/vector-pcmp.ll          | 26 ++----
 llvm/test/CodeGen/X86/vector-smax-range.ll    | 12 +--
 llvm/test/CodeGen/X86/vector-smin-range.ll    | 12 +--
 llvm/test/CodeGen/X86/vector-umax-range.ll    | 91 +++++-------------
 llvm/test/CodeGen/X86/vector-umin-range.ll    | 93 ++++++-------------
 7 files changed, 118 insertions(+), 170 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e3300000fa6f4..69c3455573918 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10799,6 +10799,28 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
   unsigned Opcode = Node->getOpcode();
   SDLoc DL(Node);
 
+  // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
+  unsigned AltOpcode;
+  switch (Opcode) {
+  case ISD::SMIN:
+    AltOpcode = ISD::UMIN;
+    break;
+  case ISD::SMAX:
+    AltOpcode = ISD::UMAX;
+    break;
+  case ISD::UMIN:
+    AltOpcode = ISD::SMIN;
+    break;
+  case ISD::UMAX:
+    AltOpcode = ISD::SMAX;
+    break;
+  default:
+    llvm_unreachable("Unknown MINMAX opcode");
+  }
+  if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
+      DAG.SignBitIsZero(Op1))
+    return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
+
   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fed4df707400f..602b026def45b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29546,6 +29546,10 @@ static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget,
 static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
+  unsigned SizeInBits = VT.getSizeInBits();
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  bool IsMax = Op.getOpcode() == ISD::SMAX || Op.getOpcode() == ISD::UMAX;
+  bool IsSigned = Op.getOpcode() == ISD::SMAX || Op.getOpcode() == ISD::SMIN;
   SDLoc DL(Op);
 
   // For AVX1 cases, split to use legal ops.
@@ -29555,6 +29559,34 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
   if (VT == MVT::v32i16 || VT == MVT::v64i8)
     return splitVectorIntBinary(Op, DAG, DL);
 
+  // See if the vector elements have sufficient leading bits to allow a
+  // smaller minmax opcode to be used.
+  if (VT.isVector() && EltSizeInBits > 8) {
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    unsigned CLS = DAG.ComputeNumSignBits(N0);
+    if (CLS != 1)
+      CLS = std::min(CLS, DAG.ComputeNumSignBits(N1));
+    unsigned CLZ = DAG.computeKnownBits(N0).countMinLeadingZeros();
+    if (CLZ != 0)
+      CLZ = std::min(CLZ, DAG.computeKnownBits(N1).countMinLeadingZeros());
+    for (unsigned Bits = 8; Bits < EltSizeInBits; Bits += Bits) {
+      std::optional<unsigned> Opcode;
+      if (CLZ >= (EltSizeInBits - Bits)) {
+        Opcode = IsMax ? ISD::UMAX : ISD::UMIN;
+      } else if ((IsSigned ? CLS : CLZ) > (EltSizeInBits - Bits)) {
+        Opcode = IsMax ? ISD::SMAX : ISD::SMIN;
+      }
+      if (Opcode.has_value()) {
+        MVT ReducedSVT = MVT::getIntegerVT(Bits);
+        MVT ReducedVT = MVT::getVectorVT(ReducedSVT, SizeInBits / Bits);
+        return DAG.getBitcast(VT, DAG.getNode(*Opcode, DL, ReducedVT,
+                                              DAG.getBitcast(ReducedVT, N0),
+                                              DAG.getBitcast(ReducedVT, N1)));
+      }
+    }
+  }
+
   // Default to expand.
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/X86/vector-pcmp.ll b/llvm/test/CodeGen/X86/vector-pcmp.ll
index 30eb2279bda85..7e42c73f86315 100644
--- a/llvm/test/CodeGen/X86/vector-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vector-pcmp.ll
@@ -1961,14 +1961,11 @@ define <4 x i64> @PR52504(<4 x i16> %t3) {
 ; SSE42-LABEL: PR52504:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; SSE42-NEXT:    pmovsxwq %xmm1, %xmm2
-; SSE42-NEXT:    pmovsxwq %xmm0, %xmm3
-; SSE42-NEXT:    pxor %xmm1, %xmm1
-; SSE42-NEXT:    pxor %xmm0, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
-; SSE42-NEXT:    por %xmm3, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
-; SSE42-NEXT:    por %xmm2, %xmm1
+; SSE42-NEXT:    pmovsxwq %xmm1, %xmm1
+; SSE42-NEXT:    pmovsxwq %xmm0, %xmm0
+; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE42-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE42-NEXT:    pmaxsw %xmm2, %xmm1
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: PR52504:
@@ -1976,20 +1973,17 @@ define <4 x i64> @PR52504(<4 x i16> %t3) {
 ; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
 ; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm3
-; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm2
-; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: PR52504:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm1
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: PR52504:
diff --git a/llvm/test/CodeGen/X86/vector-smax-range.ll b/llvm/test/CodeGen/X86/vector-smax-range.ll
index 56b41dad5049a..aa15fa25068af 100644
--- a/llvm/test/CodeGen/X86/vector-smax-range.ll
+++ b/llvm/test/CodeGen/X86/vector-smax-range.ll
@@ -11,11 +11,7 @@ define <4 x i32> @smax_v4i32_as_umax_v4i8(<4 x i32> %a0, <4 x i32> %a1) nounwind
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrld $30, %xmm0
 ; SSE2-NEXT:    psrld $29, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pmaxub %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: smax_v4i32_as_umax_v4i8:
@@ -42,11 +38,7 @@ define <4 x i32> @smax_v4i32_as_smax_v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwin
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrad $16, %xmm0
 ; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: smax_v4i32_as_smax_v4i16:
diff --git a/llvm/test/CodeGen/X86/vector-smin-range.ll b/llvm/test/CodeGen/X86/vector-smin-range.ll
index a3c58ea5b4517..ce68a030fc185 100644
--- a/llvm/test/CodeGen/X86/vector-smin-range.ll
+++ b/llvm/test/CodeGen/X86/vector-smin-range.ll
@@ -11,11 +11,7 @@ define <4 x i32> @smin_v4i32_as_umin_v4i8(<4 x i32> %a0, <4 x i32> %a1) nounwind
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrld $30, %xmm0
 ; SSE2-NEXT:    psrld $29, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pminub %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: smin_v4i32_as_umin_v4i8:
@@ -42,11 +38,7 @@ define <4 x i32> @smin_v4i32_as_smin_v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwin
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrad $16, %xmm0
 ; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pminsw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: smin_v4i32_as_smin_v4i16:
diff --git a/llvm/test/CodeGen/X86/vector-umax-range.ll b/llvm/test/CodeGen/X86/vector-umax-range.ll
index c1a0ad2c29932..a0ff1de55fc3d 100644
--- a/llvm/test/CodeGen/X86/vector-umax-range.ll
+++ b/llvm/test/CodeGen/X86/vector-umax-range.ll
@@ -38,11 +38,7 @@ define <4 x i32> @umax_v4i32_as_umax_v4i8(<4 x i32> %a0, <4 x i32> %a1) nounwind
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrld $30, %xmm0
 ; SSE2-NEXT:    psrld $29, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pmaxub %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umax_v4i32_as_umax_v4i8:
@@ -69,11 +65,8 @@ define <4 x i32> @umax_v4i32_as_umax_v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwin
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    psrld $16, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    psubusw %xmm0, %xmm1
+; SSE2-NEXT:    paddw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umax_v4i32_as_umax_v4i16:
@@ -100,39 +93,28 @@ define <2 x i64> @umax_v2i64_as_umax_v2i16(<2 x i64> %a0, <2 x i64> %a1) nounwin
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrlq $49, %xmm0
 ; SSE2-NEXT:    psrlq $63, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umax_v2i64_as_umax_v2i16:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psrlq $49, %xmm2
+; SSE42-NEXT:    psrlq $49, %xmm0
 ; SSE42-NEXT:    psrlq $63, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
-; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: umax_v2i64_as_umax_v2i16:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsrlq $49, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlq $63, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umax_v2i64_as_umax_v2i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsrlq $49, %xmm0, %xmm0
 ; AVX2-NEXT:    vpsrlq $63, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: umax_v2i64_as_umax_v2i16:
@@ -162,7 +144,6 @@ define <2 x i64> @umax_v2i64_as_umax_v2i32(<2 x i64> %a0, <2 x i64> %a1) nounwin
 ; SSE2-NEXT:    psrlq $43, %xmm1
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
 ; SSE2-NEXT:    pand %xmm2, %xmm0
 ; SSE2-NEXT:    pandn %xmm1, %xmm2
 ; SSE2-NEXT:    por %xmm2, %xmm0
@@ -170,29 +151,23 @@ define <2 x i64> @umax_v2i64_as_umax_v2i32(<2 x i64> %a0, <2 x i64> %a1) nounwin
 ;
 ; SSE42-LABEL: umax_v2i64_as_umax_v2i32:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psrlq $33, %xmm2
+; SSE42-NEXT:    psrlq $33, %xmm0
 ; SSE42-NEXT:    psrlq $43, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
-; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    pmaxud %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: umax_v2i64_as_umax_v2i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsrlq $33, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlq $43, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umax_v2i64_as_umax_v2i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsrlq $33, %xmm0, %xmm0
 ; AVX2-NEXT:    vpsrlq $43, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: umax_v2i64_as_umax_v2i32:
@@ -221,35 +196,20 @@ define <4 x i64> @umax_v4i64_as_umax_v4i16(<4 x i64> %a0) nounwind {
 ; SSE2-NEXT:    psrlq $48, %xmm1
 ; SSE2-NEXT:    psrlq $48, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65530,65530]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
-; SSE2-NEXT:    pandn %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    por %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
-; SSE2-NEXT:    pandn %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm4, %xmm1
-; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    psubusw %xmm0, %xmm3
+; SSE2-NEXT:    paddw %xmm3, %xmm0
+; SSE2-NEXT:    psubusw %xmm1, %xmm2
+; SSE2-NEXT:    paddw %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umax_v4i64_as_umax_v4i16:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psrlq $48, %xmm2
-; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [65530,65530]
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
-; SSE42-NEXT:    movdqa %xmm3, %xmm4
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm4
 ; SSE42-NEXT:    psrlq $48, %xmm1
-; SSE42-NEXT:    movdqa %xmm1, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movapd %xmm4, %xmm0
-; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    psrlq $48, %xmm0
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [65530,65530]
+; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE42-NEXT:    pmaxuw %xmm2, %xmm1
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: umax_v4i64_as_umax_v4i16:
@@ -258,10 +218,8 @@ define <4 x i64> @umax_v4i64_as_umax_v4i16(<4 x i64> %a0) nounwind {
 ; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm3
-; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm1, %xmm4
-; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm4, %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -269,8 +227,7 @@ define <4 x i64> @umax_v4i64_as_umax_v4i16(<4 x i64> %a0) nounwind {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsrlq $48, %ymm0, %ymm0
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [65530,65530,65530,65530]
-; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
-; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: umax_v4i64_as_umax_v4i16:
diff --git a/llvm/test/CodeGen/X86/vector-umin-range.ll b/llvm/test/CodeGen/X86/vector-umin-range.ll
index 6365695f5995a..0cf816cc35449 100644
--- a/llvm/test/CodeGen/X86/vector-umin-range.ll
+++ b/llvm/test/CodeGen/X86/vector-umin-range.ll
@@ -38,11 +38,7 @@ define <4 x i32> @umin_v4i32_as_umin_v4i8(<4 x i32> %a0, <4 x i32> %a1) nounwind
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrld $30, %xmm0
 ; SSE2-NEXT:    psrld $29, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pminub %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umin_v4i32_as_umin_v4i8:
@@ -69,11 +65,9 @@ define <4 x i32> @umin_v4i32_as_umin_v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwin
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    psrld $16, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psubusw %xmm1, %xmm2
+; SSE2-NEXT:    psubw %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umin_v4i32_as_umin_v4i16:
@@ -100,39 +94,28 @@ define <2 x i64> @umin_v2i64_as_umin_v2i16(<2 x i64> %a0, <2 x i64> %a1) nounwin
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    psrlq $49, %xmm0
 ; SSE2-NEXT:    psrlq $63, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    pminsw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umin_v2i64_as_umin_v2i16:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psrlq $49, %xmm2
+; SSE42-NEXT:    psrlq $49, %xmm0
 ; SSE42-NEXT:    psrlq $63, %xmm1
-; SSE42-NEXT:    movdqa %xmm1, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
-; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    pminuw %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: umin_v2i64_as_umin_v2i16:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsrlq $49, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlq $63, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umin_v2i64_as_umin_v2i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsrlq $49, %xmm0, %xmm0
 ; AVX2-NEXT:    vpsrlq $63, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: umin_v2i64_as_umin_v2i16:
@@ -162,7 +145,6 @@ define <2 x i64> @umin_v2i64_as_umin_v2i32(<2 x i64> %a0, <2 x i64> %a1) nounwin
 ; SSE2-NEXT:    psrlq $43, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
 ; SSE2-NEXT:    pand %xmm2, %xmm0
 ; SSE2-NEXT:    pandn %xmm1, %xmm2
 ; SSE2-NEXT:    por %xmm2, %xmm0
@@ -170,29 +152,23 @@ define <2 x i64> @umin_v2i64_as_umin_v2i32(<2 x i64> %a0, <2 x i64> %a1) nounwin
 ;
 ; SSE42-LABEL: umin_v2i64_as_umin_v2i32:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psrlq $33, %xmm2
+; SSE42-NEXT:    psrlq $33, %xmm0
 ; SSE42-NEXT:    psrlq $43, %xmm1
-; SSE42-NEXT:    movdqa %xmm1, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
-; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    pminud %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: umin_v2i64_as_umin_v2i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsrlq $33, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlq $43, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umin_v2i64_as_umin_v2i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsrlq $33, %xmm0, %xmm0
 ; AVX2-NEXT:    vpsrlq $43, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: umin_v2i64_as_umin_v2i32:
@@ -221,35 +197,21 @@ define <4 x i64> @umin_v4i64_as_umin_v4i16(<4 x i64> %a0) nounwind {
 ; SSE2-NEXT:    psrlq $48, %xmm1
 ; SSE2-NEXT:    psrlq $48, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65530,65530]
-; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
-; SSE2-NEXT:    pandn %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    por %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
-; SSE2-NEXT:    pandn %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm4, %xmm1
-; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    psubusw %xmm2, %xmm3
+; SSE2-NEXT:    psubw %xmm3, %xmm0
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    psubusw %xmm2, %xmm3
+; SSE2-NEXT:    psubw %xmm3, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: umin_v4i64_as_umin_v4i16:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psrlq $48, %xmm2
-; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [65530,65530]
-; SSE42-NEXT:    movdqa %xmm3, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE42-NEXT:    movdqa %xmm3, %xmm4
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm4
 ; SSE42-NEXT:    psrlq $48, %xmm1
-; SSE42-NEXT:    movdqa %xmm3, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movapd %xmm4, %xmm0
-; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    psrlq $48, %xmm0
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [65530,65530]
+; SSE42-NEXT:    pminuw %xmm2, %xmm0
+; SSE42-NEXT:    pminuw %xmm2, %xmm1
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: umin_v4i64_as_umin_v4i16:
@@ -258,10 +220,8 @@ define <4 x i64> @umin_v4i64_as_umin_v4i16(<4 x i64> %a0) nounwind {
 ; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm3
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm4
-; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm4, %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -269,8 +229,7 @@ define <4 x i64> @umin_v4i64_as_umin_v4i16(<4 x i64> %a0) nounwind {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsrlq $48, %ymm0, %ymm0
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [65530,65530,65530,65530]
-; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
-; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: umin_v4i64_as_umin_v4i16:



More information about the llvm-commits mailing list