[llvm] r321672 - [ValueTracking] recognize min/max of min/max patterns

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 2 12:56:45 PST 2018


Author: spatel
Date: Tue Jan  2 12:56:45 2018
New Revision: 321672

URL: http://llvm.org/viewvc/llvm-project?rev=321672&view=rev
Log:
[ValueTracking] recognize min/max of min/max patterns

This is part of solving PR35717:
https://bugs.llvm.org/show_bug.cgi?id=35717

The larger IR optimization is proposed in D41603, but we can show 
the improvement in ValueTracking using codegen tests because 
SelectionDAG creates min/max nodes based on ValueTracking. 

Any target with min/max ops should show wins here. I chose AArch64
vector ops because they're clean and uniform.

Some Alive proofs for the tests (can't put more than 2 tests in 1 
page currently because the web app says it's too long):
https://rise4fun.com/Alive/WRN
https://rise4fun.com/Alive/iPm
https://rise4fun.com/Alive/HmY
https://rise4fun.com/Alive/CNm
https://rise4fun.com/Alive/LYf


Modified:
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=321672&r1=321671&r2=321672&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Tue Jan  2 12:56:45 2018
@@ -4161,6 +4161,81 @@ static SelectPatternResult matchClamp(Cm
   return {SPF_UNKNOWN, SPNB_NA, false};
 }
 
+/// Recognize variations of:
+///   a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
+static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
+                                               Value *CmpLHS, Value *CmpRHS,
+                                               Value *TrueVal, Value *FalseVal) {
+  // TODO: Allow FP min/max with nnan/nsz.
+  assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
+
+  Value *A, *B;
+  SelectPatternResult L = matchSelectPattern(TrueVal, A, B);
+  if (!SelectPatternResult::isMinOrMax(L.Flavor))
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  Value *C, *D;
+  SelectPatternResult R = matchSelectPattern(FalseVal, C, D);
+  if (L.Flavor != R.Flavor)
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // Match the compare to the min/max operations of the select operands.
+  switch (L.Flavor) {
+  case SPF_SMIN:
+    if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  case SPF_SMAX:
+    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  case SPF_UMIN:
+    if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  case SPF_UMAX:
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  default:
+    llvm_unreachable("Bad flavor while matching min/max");
+  }
+
+  // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+  if (CmpLHS == A && CmpRHS == C && D == B)
+    return {L.Flavor, SPNB_NA, false};
+
+  // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
+  if (CmpLHS == A && CmpRHS == D && C == B)
+    return {L.Flavor, SPNB_NA, false};
+
+  // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
+  if (CmpLHS == B && CmpRHS == C && D == A)
+    return {L.Flavor, SPNB_NA, false};
+
+  // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
+  if (CmpLHS == B && CmpRHS == D && C == A)
+    return {L.Flavor, SPNB_NA, false};
+
+  return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
 /// Match non-obvious integer minimum and maximum sequences.
 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
                                        Value *CmpLHS, Value *CmpRHS,
@@ -4174,6 +4249,10 @@ static SelectPatternResult matchMinMax(C
   if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
     return SPR;
 
+  SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
+  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
+    return SPR;
+  
   if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
     return {SPF_UNKNOWN, SPNB_NA, false};
 

Modified: llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll?rev=321672&r1=321671&r2=321672&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll Tue Jan  2 12:56:45 2018
@@ -9,10 +9,9 @@
 define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -26,10 +25,9 @@ define <4 x i32> @smin_ab_bc(<4 x i32> %
 define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -43,10 +41,9 @@ define <4 x i32> @smin_ab_cb(<4 x i32> %
 define <4 x i32> @smin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -60,10 +57,9 @@ define <4 x i32> @smin_bc_ab(<4 x i32> %
 define <4 x i32> @smin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -77,10 +73,9 @@ define <4 x i32> @smin_bc_ba(<4 x i32> %
 define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -94,10 +89,9 @@ define <4 x i32> @smin_ab_bc_swap_pred(<
 define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -111,10 +105,9 @@ define <4 x i32> @smin_ab_cb_swap_pred(<
 define <4 x i32> @smin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -128,10 +121,9 @@ define <4 x i32> @smin_bc_ab_swap_pred(<
 define <4 x i32> @smin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -145,10 +137,9 @@ define <4 x i32> @smin_bc_ba_swap_pred(<
 define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -162,10 +153,9 @@ define <4 x i32> @smin_ab_bc_eq_pred(<4
 define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -179,10 +169,9 @@ define <4 x i32> @smin_ab_cb_eq_pred(<4
 define <4 x i32> @smin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -196,10 +185,9 @@ define <4 x i32> @smin_bc_ab_eq_pred(<4
 define <4 x i32> @smin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -213,10 +201,9 @@ define <4 x i32> @smin_bc_ba_eq_pred(<4
 define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -230,10 +217,9 @@ define <4 x i32> @smin_ab_bc_eq_swap_pre
 define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp slt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -247,10 +233,9 @@ define <4 x i32> @smin_ab_cb_eq_swap_pre
 define <4 x i32> @smin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -264,10 +249,9 @@ define <4 x i32> @smin_bc_ab_eq_swap_pre
 define <4 x i32> @smin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smin_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp slt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -281,10 +265,9 @@ define <4 x i32> @smin_bc_ba_eq_swap_pre
 define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -298,10 +281,9 @@ define <4 x i32> @smax_ab_bc(<4 x i32> %
 define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -315,10 +297,9 @@ define <4 x i32> @smax_ab_cb(<4 x i32> %
 define <4 x i32> @smax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -332,10 +313,9 @@ define <4 x i32> @smax_bc_ab(<4 x i32> %
 define <4 x i32> @smax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -349,10 +329,9 @@ define <4 x i32> @smax_bc_ba(<4 x i32> %
 define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -366,10 +345,9 @@ define <4 x i32> @smax_ab_bc_swap_pred(<
 define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -383,10 +361,9 @@ define <4 x i32> @smax_ab_cb_swap_pred(<
 define <4 x i32> @smax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -400,10 +377,9 @@ define <4 x i32> @smax_bc_ab_swap_pred(<
 define <4 x i32> @smax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -417,10 +393,9 @@ define <4 x i32> @smax_bc_ba_swap_pred(<
 define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -434,10 +409,9 @@ define <4 x i32> @smax_ab_bc_eq_pred(<4
 define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -451,10 +425,9 @@ define <4 x i32> @smax_ab_cb_eq_pred(<4
 define <4 x i32> @smax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -468,10 +441,9 @@ define <4 x i32> @smax_bc_ab_eq_pred(<4
 define <4 x i32> @smax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -485,10 +457,9 @@ define <4 x i32> @smax_bc_ba_eq_pred(<4
 define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -502,10 +473,9 @@ define <4 x i32> @smax_ab_bc_eq_swap_pre
 define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp sgt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -519,10 +489,9 @@ define <4 x i32> @smax_ab_cb_eq_swap_pre
 define <4 x i32> @smax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -536,10 +505,9 @@ define <4 x i32> @smax_bc_ab_eq_swap_pre
 define <4 x i32> @smax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: smax_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp sgt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -553,10 +521,9 @@ define <4 x i32> @smax_bc_ba_eq_swap_pre
 define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -570,10 +537,9 @@ define <4 x i32> @umin_ab_bc(<4 x i32> %
 define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -587,10 +553,9 @@ define <4 x i32> @umin_ab_cb(<4 x i32> %
 define <4 x i32> @umin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -604,10 +569,9 @@ define <4 x i32> @umin_bc_ab(<4 x i32> %
 define <4 x i32> @umin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -621,10 +585,9 @@ define <4 x i32> @umin_bc_ba(<4 x i32> %
 define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -638,10 +601,9 @@ define <4 x i32> @umin_ab_bc_swap_pred(<
 define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -655,10 +617,9 @@ define <4 x i32> @umin_ab_cb_swap_pred(<
 define <4 x i32> @umin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -672,10 +633,9 @@ define <4 x i32> @umin_bc_ab_swap_pred(<
 define <4 x i32> @umin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -689,10 +649,9 @@ define <4 x i32> @umin_bc_ba_swap_pred(<
 define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -706,10 +665,9 @@ define <4 x i32> @umin_ab_bc_eq_pred(<4
 define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -723,10 +681,9 @@ define <4 x i32> @umin_ab_cb_eq_pred(<4
 define <4 x i32> @umin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -740,10 +697,9 @@ define <4 x i32> @umin_bc_ab_eq_pred(<4
 define <4 x i32> @umin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -757,10 +713,9 @@ define <4 x i32> @umin_bc_ba_eq_pred(<4
 define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -774,10 +729,9 @@ define <4 x i32> @umin_ab_bc_eq_swap_pre
 define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ult <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -791,10 +745,9 @@ define <4 x i32> @umin_ab_cb_eq_swap_pre
 define <4 x i32> @umin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -808,10 +761,9 @@ define <4 x i32> @umin_bc_ab_eq_swap_pre
 define <4 x i32> @umin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umin_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umin v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ult <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -825,10 +777,9 @@ define <4 x i32> @umin_bc_ba_eq_swap_pre
 define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -842,10 +793,9 @@ define <4 x i32> @umax_ab_bc(<4 x i32> %
 define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -859,10 +809,9 @@ define <4 x i32> @umax_ab_cb(<4 x i32> %
 define <4 x i32> @umax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -876,10 +825,9 @@ define <4 x i32> @umax_bc_ab(<4 x i32> %
 define <4 x i32> @umax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -893,10 +841,9 @@ define <4 x i32> @umax_bc_ba(<4 x i32> %
 define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -910,10 +857,9 @@ define <4 x i32> @umax_ab_bc_swap_pred(<
 define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -927,10 +873,9 @@ define <4 x i32> @umax_ab_cb_swap_pred(<
 define <4 x i32> @umax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -944,10 +889,9 @@ define <4 x i32> @umax_bc_ab_swap_pred(<
 define <4 x i32> @umax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -961,10 +905,9 @@ define <4 x i32> @umax_bc_ba_swap_pred(<
 define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -978,10 +921,9 @@ define <4 x i32> @umax_ab_bc_eq_pred(<4
 define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -995,10 +937,9 @@ define <4 x i32> @umax_ab_cb_eq_pred(<4
 define <4 x i32> @umax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -1012,10 +953,9 @@ define <4 x i32> @umax_bc_ab_eq_pred(<4
 define <4 x i32> @umax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -1029,10 +969,9 @@ define <4 x i32> @umax_bc_ba_eq_pred(<4
 define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -1046,10 +985,9 @@ define <4 x i32> @umax_ab_bc_eq_swap_pre
 define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %cmp_ab = icmp ugt <4 x i32> %a, %b
   %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
@@ -1063,10 +1001,9 @@ define <4 x i32> @umax_ab_cb_eq_swap_pre
 define <4 x i32> @umax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
@@ -1080,10 +1017,9 @@ define <4 x i32> @umax_bc_ab_eq_swap_pre
 define <4 x i32> @umax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: umax_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umax v3.4s, v1.4s, v2.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %cmp_bc = icmp ugt <4 x i32> %b, %c
   %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c




More information about the llvm-commits mailing list