[llvm] r322283 - [ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 11 07:13:47 PST 2018


Author: spatel
Date: Thu Jan 11 07:13:47 2018
New Revision: 322283

URL: http://llvm.org/viewvc/llvm-project?rev=322283&view=rev
Log:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)

This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a 
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on 
smaller canonicalizations to get us to the goal.

As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.

We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603

An Alive proof for one variant of the pattern to show that the 
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1

Name: min3_nots
  %nx = xor i8 %x, -1
  %ny = xor i8 %y, -1
  %nz = xor i8 %z, -1
  %cmpxz = icmp slt i8 %nx, %nz
  %minxz = select i1 %cmpxz, i8 %nx, i8 %nz
  %cmpyz = icmp slt i8 %ny, %nz
  %minyz = select i1 %cmpyz, i8 %ny, i8 %nz
  %cmpyx = icmp slt i8 %y, %x
  %r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
  %cmpxyz = icmp slt i8 %minxz, %ny
  %r = select i1 %cmpxyz, i8 %minxz, i8 %ny

Name: min3_nots_alt
  %nx = xor i8 %x, -1
  %ny = xor i8 %y, -1
  %nz = xor i8 %z, -1
  %cmpxz = icmp slt i8 %nx, %nz
  %minxz = select i1 %cmpxz, i8 %nx, i8 %nz
  %cmpyz = icmp slt i8 %ny, %nz
  %minyz = select i1 %cmpyz, i8 %ny, i8 %nz
  %cmpyx = icmp slt i8 %y, %x
  %r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
  %xz = icmp sgt i8 %x, %z
  %maxxz = select i1 %xz, i8 %x, i8 %z
  %xyz = icmp sgt i8 %maxxz, %y
  %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
  %r = xor i8 %maxxyz, -1



Modified:
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll
    llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=322283&r1=322282&r2=322283&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Thu Jan 11 07:13:47 2018
@@ -4179,7 +4179,9 @@ static SelectPatternResult matchMinMaxOf
   if (L.Flavor != R.Flavor)
     return {SPF_UNKNOWN, SPNB_NA, false};
 
-  // Match the compare to the min/max operations of the select operands.
+  // We have something like: x Pred y ? min(a, b) : min(c, d).
+  // Try to match the compare to the min/max operations of the select operands.
+  // First, make sure we have the right compare predicate.
   switch (L.Flavor) {
   case SPF_SMIN:
     if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
@@ -4217,21 +4219,38 @@ static SelectPatternResult matchMinMaxOf
     return {SPF_UNKNOWN, SPNB_NA, false};
   }
 
-  // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
-  if (CmpLHS == A && CmpRHS == C && D == B)
-    return {L.Flavor, SPNB_NA, false};
+  // If there is a common operand in the already matched min/max and the other
+  // min/max operands match the compare operands (either directly or inverted),
+  // then this is min/max of the same flavor.
 
+  // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+  // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+  if (D == B) {
+    if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
+                                         match(A, m_Not(m_Specific(CmpRHS)))))
+      return {L.Flavor, SPNB_NA, false};
+  }
   // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
-  if (CmpLHS == A && CmpRHS == D && C == B)
-    return {L.Flavor, SPNB_NA, false};
-
+  // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
+  if (C == B) {
+    if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
+                                         match(A, m_Not(m_Specific(CmpRHS)))))
+      return {L.Flavor, SPNB_NA, false};
+  }
   // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
-  if (CmpLHS == B && CmpRHS == C && D == A)
-    return {L.Flavor, SPNB_NA, false};
-
+  // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
+  if (D == A) {
+    if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
+                                         match(B, m_Not(m_Specific(CmpRHS)))))
+      return {L.Flavor, SPNB_NA, false};
+  }
   // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
-  if (CmpLHS == B && CmpRHS == D && C == A)
-    return {L.Flavor, SPNB_NA, false};
+  // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
+  if (C == A) {
+    if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
+                                         match(B, m_Not(m_Specific(CmpRHS)))))
+      return {L.Flavor, SPNB_NA, false};
+  }
 
   return {SPF_UNKNOWN, SPNB_NA, false};
 }

Modified: llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll?rev=322283&r1=322282&r2=322283&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/minmax-of-minmax.ll Thu Jan 11 07:13:47 2018
@@ -4,7 +4,7 @@
 ; There are 4 commuted variants (abbc/abcb/bcab/bcba) *
 ;           4 predicate variants ([*][lg][te]) *
 ;           4 min/max flavors (smin/smax/umin/umax) *
-;           2 notted variants  
+;           2 notted variants
 ;           = 128 tests
 
 define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
@@ -1034,13 +1034,12 @@ define <4 x i32> @umax_bc_ba_eq_swap_pre
 define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1057,13 +1056,12 @@ define <4 x i32> @notted_smin_ab_bc(<4 x
 define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1080,13 +1078,12 @@ define <4 x i32> @notted_smin_ab_cb(<4 x
 define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1103,13 +1100,12 @@ define <4 x i32> @notted_smin_bc_ab(<4 x
 define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1126,13 +1122,12 @@ define <4 x i32> @notted_smin_bc_ba(<4 x
 define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1149,13 +1144,12 @@ define <4 x i32> @notted_smin_ab_bc_swap
 define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1172,13 +1166,12 @@ define <4 x i32> @notted_smin_ab_cb_swap
 define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1195,13 +1188,12 @@ define <4 x i32> @notted_smin_bc_ab_swap
 define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1218,13 +1210,12 @@ define <4 x i32> @notted_smin_bc_ba_swap
 define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1241,13 +1232,12 @@ define <4 x i32> @notted_smin_ab_bc_eq_p
 define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1264,13 +1254,12 @@ define <4 x i32> @notted_smin_ab_cb_eq_p
 define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1287,13 +1276,12 @@ define <4 x i32> @notted_smin_bc_ab_eq_p
 define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1310,13 +1298,12 @@ define <4 x i32> @notted_smin_bc_ba_eq_p
 define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1333,13 +1320,12 @@ define <4 x i32> @notted_smin_ab_bc_eq_s
 define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1356,13 +1342,12 @@ define <4 x i32> @notted_smin_ab_cb_eq_s
 define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1379,13 +1364,12 @@ define <4 x i32> @notted_smin_bc_ab_eq_s
 define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1402,13 +1386,12 @@ define <4 x i32> @notted_smin_bc_ba_eq_s
 define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1425,13 +1408,12 @@ define <4 x i32> @notted_smax_ab_bc(<4 x
 define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1448,13 +1430,12 @@ define <4 x i32> @notted_smax_ab_cb(<4 x
 define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1471,13 +1452,12 @@ define <4 x i32> @notted_smax_bc_ab(<4 x
 define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1494,13 +1474,12 @@ define <4 x i32> @notted_smax_bc_ba(<4 x
 define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1517,13 +1496,12 @@ define <4 x i32> @notted_smax_ab_bc_swap
 define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1540,13 +1518,12 @@ define <4 x i32> @notted_smax_ab_cb_swap
 define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1563,13 +1540,12 @@ define <4 x i32> @notted_smax_bc_ab_swap
 define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1586,13 +1562,12 @@ define <4 x i32> @notted_smax_bc_ba_swap
 define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1609,13 +1584,12 @@ define <4 x i32> @notted_smax_ab_bc_eq_p
 define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1632,13 +1606,12 @@ define <4 x i32> @notted_smax_ab_cb_eq_p
 define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1655,13 +1628,12 @@ define <4 x i32> @notted_smax_bc_ab_eq_p
 define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1678,13 +1650,12 @@ define <4 x i32> @notted_smax_bc_ba_eq_p
 define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1701,13 +1672,12 @@ define <4 x i32> @notted_smax_ab_bc_eq_s
 define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    smax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1724,13 +1694,12 @@ define <4 x i32> @notted_smax_ab_cb_eq_s
 define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1747,13 +1716,12 @@ define <4 x i32> @notted_smax_bc_ab_eq_s
 define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    smax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    smax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmge v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    smax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    smax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    smax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1770,13 +1738,12 @@ define <4 x i32> @notted_smax_bc_ba_eq_s
 define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1793,13 +1760,12 @@ define <4 x i32> @notted_umin_ab_bc(<4 x
 define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1816,13 +1782,12 @@ define <4 x i32> @notted_umin_ab_cb(<4 x
 define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1839,13 +1804,12 @@ define <4 x i32> @notted_umin_bc_ab(<4 x
 define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1862,13 +1826,12 @@ define <4 x i32> @notted_umin_bc_ba(<4 x
 define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1885,13 +1848,12 @@ define <4 x i32> @notted_umin_ab_bc_swap
 define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1908,13 +1870,12 @@ define <4 x i32> @notted_umin_ab_cb_swap
 define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1931,13 +1892,12 @@ define <4 x i32> @notted_umin_bc_ab_swap
 define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1954,13 +1914,12 @@ define <4 x i32> @notted_umin_bc_ba_swap
 define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1977,13 +1936,12 @@ define <4 x i32> @notted_umin_ab_bc_eq_p
 define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2000,13 +1958,12 @@ define <4 x i32> @notted_umin_ab_cb_eq_p
 define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2023,13 +1980,12 @@ define <4 x i32> @notted_umin_bc_ab_eq_p
 define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2046,13 +2002,12 @@ define <4 x i32> @notted_umin_bc_ba_eq_p
 define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2069,13 +2024,12 @@ define <4 x i32> @notted_umin_ab_bc_eq_s
 define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umin v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2092,13 +2046,12 @@ define <4 x i32> @notted_umin_ab_cb_eq_s
 define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2115,13 +2068,12 @@ define <4 x i32> @notted_umin_bc_ab_eq_s
 define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umin v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umin v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umin v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umin v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umin v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2138,13 +2090,12 @@ define <4 x i32> @notted_umin_bc_ba_eq_s
 define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_bc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2161,13 +2112,12 @@ define <4 x i32> @notted_umax_ab_bc(<4 x
 define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_cb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2184,13 +2134,12 @@ define <4 x i32> @notted_umax_ab_cb(<4 x
 define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ab:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2207,13 +2156,12 @@ define <4 x i32> @notted_umax_bc_ab(<4 x
 define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ba:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2230,13 +2178,12 @@ define <4 x i32> @notted_umax_bc_ba(<4 x
 define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_bc_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2253,13 +2200,12 @@ define <4 x i32> @notted_umax_ab_bc_swap
 define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_cb_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2276,13 +2222,12 @@ define <4 x i32> @notted_umax_ab_cb_swap
 define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ab_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2299,13 +2244,12 @@ define <4 x i32> @notted_umax_bc_ab_swap
 define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ba_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2322,13 +2266,12 @@ define <4 x i32> @notted_umax_bc_ba_swap
 define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_bc_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2345,13 +2288,12 @@ define <4 x i32> @notted_umax_ab_bc_eq_p
 define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_cb_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2368,13 +2310,12 @@ define <4 x i32> @notted_umax_ab_cb_eq_p
 define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ab_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2391,13 +2332,12 @@ define <4 x i32> @notted_umax_bc_ab_eq_p
 define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ba_eq_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2414,13 +2354,12 @@ define <4 x i32> @notted_umax_bc_ba_eq_p
 define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v4.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2437,13 +2376,12 @@ define <4 x i32> @notted_umax_ab_bc_eq_s
 define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v3.4s, v3.4s, v1.4s
-; CHECK-NEXT:    umax v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    bsl v0.16b, v3.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2460,13 +2398,12 @@ define <4 x i32> @notted_umax_ab_cb_eq_s
 define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2483,13 +2420,12 @@ define <4 x i32> @notted_umax_bc_ab_eq_s
 define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v3.16b, v0.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    mvn v4.16b, v2.16b
-; CHECK-NEXT:    umax v4.4s, v1.4s, v4.4s
-; CHECK-NEXT:    umax v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    cmhs v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v1.16b
+; CHECK-NEXT:    mvn v2.16b, v2.16b
+; CHECK-NEXT:    umax v2.4s, v1.4s, v2.4s
+; CHECK-NEXT:    umax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    umax v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>

Modified: llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll?rev=322283&r1=322282&r2=322283&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll Thu Jan 11 07:13:47 2018
@@ -109,13 +109,11 @@ define i8 @umin3_not_more_uses(i8 %x, i8
 ; CHECK-LABEL: @umin3_not_more_uses(
 ; CHECK-NEXT:    [[NX:%.*]] = xor i8 %x, -1
 ; CHECK-NEXT:    [[NY:%.*]] = xor i8 %y, -1
-; CHECK-NEXT:    [[NZ:%.*]] = xor i8 %z, -1
-; CHECK-NEXT:    [[CMPXZ:%.*]] = icmp ult i8 [[NX]], [[NZ]]
-; CHECK-NEXT:    [[MINXZ:%.*]] = select i1 [[CMPXZ]], i8 [[NX]], i8 [[NZ]]
-; CHECK-NEXT:    [[CMPYZ:%.*]] = icmp ult i8 [[NY]], [[NZ]]
-; CHECK-NEXT:    [[MINYZ:%.*]] = select i1 [[CMPYZ]], i8 [[NY]], i8 [[NZ]]
-; CHECK-NEXT:    [[CMPYX:%.*]] = icmp ult i8 %y, %x
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMPYX]], i8 [[MINXZ]], i8 [[MINYZ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 %x, %z
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 %x, i8 %z
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], %y
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 %y
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP4]], -1
 ; CHECK-NEXT:    call void @extra_use(i8 [[NX]])
 ; CHECK-NEXT:    call void @extra_use(i8 [[NY]])
 ; CHECK-NEXT:    ret i8 [[R]]




More information about the llvm-commits mailing list