[llvm] [DAGCombiner] Add pattern matching for negated subtraction in ABDU selection (PR #156217)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 05:49:57 PDT 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/156217

>From 679ddf32452d13dc3ea6b1022237c2270ca04115 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 30 Aug 2025 21:28:20 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)

---
 llvm/test/CodeGen/X86/abdu.ll | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 043c9155f52f9..b9e01fda29615 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -953,6 +953,33 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
   ret i128 %sub
 }
 
+define i32 @abdu_select(i32 %x, i32 %y) {
+; X86-LABEL: abdu_select:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovbel %edx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abdu_select:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmoval %edi, %eax
+; X64-NEXT:    retq
+  %sub = sub i32 %x, %y
+  %cmp = icmp ugt i32 %x, %y
+  %sub1 = sub i32 0, %sub
+  %cond = select i1 %cmp, i32 %sub, i32 %sub1
+  ret i32 %cond
+}
+
 declare i8 @llvm.abs.i8(i8, i1)
 declare i16 @llvm.abs.i16(i16, i1)
 declare i32 @llvm.abs.i32(i32, i1)

>From c61539c5f1c38e56395e99aaf4b5372bd26ceac1 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 30 Aug 2025 21:37:39 -0400
Subject: [PATCH 2/2] [DAGCombiner] Add pattern matching for negated
 subtraction in ABDU selection

select(ugt x, y), sub(x, y), sub(0, sub(x, y)) -> abdu(x, y)

This is because -diff is the same as y - x.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  50 +-
 llvm/test/CodeGen/AArch64/abds-neg.ll         |  30 +-
 llvm/test/CodeGen/AArch64/abdu-neg.ll         |  30 +-
 llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll   |   7 +-
 llvm/test/CodeGen/RISCV/abds-neg.ll           | 447 ++++++++-------
 llvm/test/CodeGen/RISCV/abdu-neg.ll           | 539 ++++++++++--------
 llvm/test/CodeGen/X86/abds-neg.ll             |  97 ++--
 llvm/test/CodeGen/X86/abdu.ll                 |  12 +-
 8 files changed, 679 insertions(+), 533 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a6ba6e518899f..8ede62dd4f3d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12284,27 +12284,45 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
   case ISD::SETGT:
   case ISD::SETGE:
   case ISD::SETUGT:
-  case ISD::SETUGE:
-    if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
-        sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
-      return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
-    if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
-        sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
-        hasOperation(ABDOpc, VT))
-      return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+  case ISD::SETUGE: {
+    if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS)))) {
+      if (sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
+        return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+
+      if (sd_match(False, m_Neg(m_Sub(m_Specific(LHS), m_Specific(RHS)))))
+        return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+    }
+
+    if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS)))) {
+      if (sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
+        return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+
+      if (sd_match(False, m_Neg(m_Sub(m_Specific(RHS), m_Specific(LHS)))))
+        return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+    }
     break;
+  }
   case ISD::SETLT:
   case ISD::SETLE:
   case ISD::SETULT:
-  case ISD::SETULE:
-    if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
-        sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
-      return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
-    if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
-        sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
-        hasOperation(ABDOpc, VT))
-      return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+  case ISD::SETULE: {
+    if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS)))) {
+      if (sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
+        return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+
+      if (sd_match(False, m_Neg(m_Sub(m_Specific(RHS), m_Specific(LHS)))))
+        return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+    }
+
+    if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS)))) {
+      if (sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
+        return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+
+      if (sd_match(False, m_Neg(m_Sub(m_Specific(LHS), m_Specific(RHS)))))
+        return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+    }
     break;
+  }
   default:
     break;
   }
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 37319642f5b34..c3624d2ba69af 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -310,10 +310,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w9, w0, w1
-; CHECK-NEXT:    sub w10, w1, w0
-; CHECK-NEXT:    cmp w8, w1, sxtb
-; CHECK-NEXT:    csel w0, w9, w10, le
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp sle i8 %a, %b
   %ab = sub i8 %a, %b
@@ -326,10 +324,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w9, w0, w1
-; CHECK-NEXT:    sub w10, w1, w0
-; CHECK-NEXT:    cmp w8, w1, sxth
-; CHECK-NEXT:    csel w0, w9, w10, lt
+; CHECK-NEXT:    subs w8, w8, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i16 %a, %b
   %ab = sub i16 %a, %b
@@ -342,7 +338,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i32 %a, %b
   %ab = sub i32 %a, %b
@@ -355,7 +351,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    cneg x0, x8, gt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i64 %a, %b
   %ab = sub i64 %a, %b
@@ -367,12 +363,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x2, x0
-; CHECK-NEXT:    sbc x9, x3, x1
-; CHECK-NEXT:    subs x10, x0, x2
-; CHECK-NEXT:    sbcs x11, x1, x3
-; CHECK-NEXT:    csel x0, x10, x8, lt
-; CHECK-NEXT:    csel x1, x11, x9, lt
+; CHECK-NEXT:    subs x8, x0, x2
+; CHECK-NEXT:    sbc x9, x1, x3
+; CHECK-NEXT:    subs x10, x2, x0
+; CHECK-NEXT:    sbcs x11, x3, x1
+; CHECK-NEXT:    csel x8, x8, x10, lt
+; CHECK-NEXT:    csel x9, x9, x11, lt
+; CHECK-NEXT:    negs x0, x8
+; CHECK-NEXT:    ngc x1, x9
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i128 %a, %b
   %ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 269cbf03f32a0..e541f39d40ed5 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -310,10 +310,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w9, w0, w1
-; CHECK-NEXT:    sub w10, w1, w0
-; CHECK-NEXT:    cmp w8, w1, uxtb
-; CHECK-NEXT:    csel w0, w9, w10, ls
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp ule i8 %a, %b
   %ab = sub i8 %a, %b
@@ -326,10 +324,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w9, w0, w1
-; CHECK-NEXT:    sub w10, w1, w0
-; CHECK-NEXT:    cmp w8, w1, uxth
-; CHECK-NEXT:    csel w0, w9, w10, lo
+; CHECK-NEXT:    subs w8, w8, w1, uxth
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i16 %a, %b
   %ab = sub i16 %a, %b
@@ -342,7 +338,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %cmp = icmp uge i32 %a, %b
   %ab = sub i32 %a, %b
@@ -355,7 +351,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, hs
+; CHECK-NEXT:    cneg x0, x8, hi
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i64 %a, %b
   %ab = sub i64 %a, %b
@@ -367,12 +363,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x2, x0
-; CHECK-NEXT:    sbc x9, x3, x1
-; CHECK-NEXT:    subs x10, x0, x2
-; CHECK-NEXT:    sbcs x11, x1, x3
-; CHECK-NEXT:    csel x0, x10, x8, lo
-; CHECK-NEXT:    csel x1, x11, x9, lo
+; CHECK-NEXT:    subs x8, x0, x2
+; CHECK-NEXT:    sbc x9, x1, x3
+; CHECK-NEXT:    subs x10, x2, x0
+; CHECK-NEXT:    sbcs x11, x3, x1
+; CHECK-NEXT:    csel x8, x8, x10, lo
+; CHECK-NEXT:    csel x9, x9, x11, lo
+; CHECK-NEXT:    negs x0, x8
+; CHECK-NEXT:    ngc x1, x9
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i128 %a, %b
   %ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index eaab932c41df7..7489ceacb7a17 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -966,10 +966,9 @@ define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
 ;
 ; CHECK-PWR78-LABEL: absd_int32_ugt_opp:
 ; CHECK-PWR78:       # %bb.0:
-; CHECK-PWR78-NEXT:    vcmpgtuw v4, v2, v3
-; CHECK-PWR78-NEXT:    vsubuwm v5, v2, v3
-; CHECK-PWR78-NEXT:    vsubuwm v2, v3, v2
-; CHECK-PWR78-NEXT:    xxsel v2, v5, v2, v4
+; CHECK-PWR78-NEXT:    vmaxuw v4, v2, v3
+; CHECK-PWR78-NEXT:    vminuw v2, v2, v3
+; CHECK-PWR78-NEXT:    vsubuwm v2, v2, v4
 ; CHECK-PWR78-NEXT:    blr
   %3 = icmp ugt <4 x i32> %0, %1
   %4 = sub <4 x i32> %0, %1
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index 41f73f51fe7b6..e4ab26a6e6cac 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -1646,42 +1646,35 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i8:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a2, a0, 24
-; RV32I-NEXT:    slli a3, a1, 24
-; RV32I-NEXT:    srai a2, a2, 24
-; RV32I-NEXT:    srai a3, a3, 24
-; RV32I-NEXT:    bge a3, a2, .LBB18_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sub a0, a1, a0
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB18_2:
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srai a1, a1, 24
+; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a2, a0, 56
-; RV64I-NEXT:    slli a3, a1, 56
-; RV64I-NEXT:    srai a2, a2, 56
-; RV64I-NEXT:    srai a3, a3, 56
-; RV64I-NEXT:    bge a3, a2, .LBB18_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    sub a0, a1, a0
-; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB18_2:
+; RV64I-NEXT:    slli a1, a1, 56
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a1, a1, 56
+; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
 ; ZBB-LABEL: abd_cmp_i8:
 ; ZBB:       # %bb.0:
-; ZBB-NEXT:    sext.b a2, a0
-; ZBB-NEXT:    sext.b a3, a1
-; ZBB-NEXT:    bge a3, a2, .LBB18_2
-; ZBB-NEXT:  # %bb.1:
-; ZBB-NEXT:    sub a0, a1, a0
-; ZBB-NEXT:    ret
-; ZBB-NEXT:  .LBB18_2:
-; ZBB-NEXT:    sub a0, a0, a1
+; ZBB-NEXT:    sext.b a1, a1
+; ZBB-NEXT:    sext.b a0, a0
+; ZBB-NEXT:    max a2, a0, a1
+; ZBB-NEXT:    min a0, a0, a1
+; ZBB-NEXT:    sub a0, a0, a2
 ; ZBB-NEXT:    ret
   %cmp = icmp sle i8 %a, %b
   %ab = sub i8 %a, %b
@@ -1693,42 +1686,35 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a2, a1, 16
-; RV32I-NEXT:    slli a3, a0, 16
-; RV32I-NEXT:    srai a2, a2, 16
-; RV32I-NEXT:    srai a3, a3, 16
-; RV32I-NEXT:    blt a3, a2, .LBB19_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sub a0, a1, a0
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB19_2:
+; RV32I-NEXT:    slli a1, a1, 16
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a2, a1, 48
-; RV64I-NEXT:    slli a3, a0, 48
-; RV64I-NEXT:    srai a2, a2, 48
-; RV64I-NEXT:    srai a3, a3, 48
-; RV64I-NEXT:    blt a3, a2, .LBB19_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    sub a0, a1, a0
-; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB19_2:
+; RV64I-NEXT:    slli a1, a1, 48
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a1, a1, 48
+; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
 ; ZBB-LABEL: abd_cmp_i16:
 ; ZBB:       # %bb.0:
-; ZBB-NEXT:    sext.h a2, a1
-; ZBB-NEXT:    sext.h a3, a0
-; ZBB-NEXT:    blt a3, a2, .LBB19_2
-; ZBB-NEXT:  # %bb.1:
-; ZBB-NEXT:    sub a0, a1, a0
-; ZBB-NEXT:    ret
-; ZBB-NEXT:  .LBB19_2:
-; ZBB-NEXT:    sub a0, a0, a1
+; ZBB-NEXT:    sext.h a1, a1
+; ZBB-NEXT:    sext.h a0, a0
+; ZBB-NEXT:    max a2, a0, a1
+; ZBB-NEXT:    min a0, a0, a1
+; ZBB-NEXT:    sub a0, a0, a2
 ; ZBB-NEXT:    ret
   %cmp = icmp slt i16 %a, %b
   %ab = sub i16 %a, %b
@@ -1740,46 +1726,40 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    bge a0, a1, .LBB20_2
+; RV32I-NEXT:    blt a1, a0, .LBB20_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB20_2:
-; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a2, a1
-; RV64I-NEXT:    sext.w a3, a0
-; RV64I-NEXT:    bge a3, a2, .LBB20_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    subw a0, a0, a1
-; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB20_2:
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abd_cmp_i32:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    bge a0, a1, .LBB20_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sub a0, a0, a1
-; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB20_2:
-; RV32ZBB-NEXT:    sub a0, a1, a0
+; RV32ZBB-NEXT:    max a2, a0, a1
+; RV32ZBB-NEXT:    min a0, a0, a1
+; RV32ZBB-NEXT:    sub a0, a0, a2
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abd_cmp_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sext.w a2, a1
-; RV64ZBB-NEXT:    sext.w a3, a0
-; RV64ZBB-NEXT:    bge a3, a2, .LBB20_2
-; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    subw a0, a0, a1
-; RV64ZBB-NEXT:    ret
-; RV64ZBB-NEXT:  .LBB20_2:
-; RV64ZBB-NEXT:    subw a0, a1, a0
+; RV64ZBB-NEXT:    sext.w a1, a1
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    max a2, a0, a1
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    subw a0, a0, a2
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp sge i32 %a, %b
   %ab = sub i32 %a, %b
@@ -1791,64 +1771,73 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    sltu a4, a0, a2
+; RV32I-NEXT:    sltu a4, a2, a0
 ; RV32I-NEXT:    mv a5, a4
 ; RV32I-NEXT:    beq a1, a3, .LBB21_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    slt a5, a1, a3
+; RV32I-NEXT:    slt a5, a3, a1
 ; RV32I-NEXT:  .LBB21_2:
 ; RV32I-NEXT:    bnez a5, .LBB21_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    sltu a4, a2, a0
 ; RV32I-NEXT:    sub a1, a3, a1
 ; RV32I-NEXT:    sub a1, a1, a4
 ; RV32I-NEXT:    sub a0, a2, a0
-; RV32I-NEXT:    ret
+; RV32I-NEXT:    j .LBB21_5
 ; RV32I-NEXT:  .LBB21_4:
+; RV32I-NEXT:    sltu a4, a0, a2
 ; RV32I-NEXT:    sub a1, a1, a3
 ; RV32I-NEXT:    sub a1, a1, a4
 ; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:  .LBB21_5:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    blt a0, a1, .LBB21_2
+; RV64I-NEXT:    blt a1, a0, .LBB21_2
 ; RV64I-NEXT:  # %bb.1:
 ; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    neg a0, a0
 ; RV64I-NEXT:    ret
 ; RV64I-NEXT:  .LBB21_2:
 ; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    neg a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abd_cmp_i64:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    sltu a4, a0, a2
+; RV32ZBB-NEXT:    sltu a4, a2, a0
 ; RV32ZBB-NEXT:    mv a5, a4
 ; RV32ZBB-NEXT:    beq a1, a3, .LBB21_2
 ; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    slt a5, a1, a3
+; RV32ZBB-NEXT:    slt a5, a3, a1
 ; RV32ZBB-NEXT:  .LBB21_2:
 ; RV32ZBB-NEXT:    bnez a5, .LBB21_4
 ; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    sltu a4, a2, a0
 ; RV32ZBB-NEXT:    sub a1, a3, a1
 ; RV32ZBB-NEXT:    sub a1, a1, a4
 ; RV32ZBB-NEXT:    sub a0, a2, a0
-; RV32ZBB-NEXT:    ret
+; RV32ZBB-NEXT:    j .LBB21_5
 ; RV32ZBB-NEXT:  .LBB21_4:
+; RV32ZBB-NEXT:    sltu a4, a0, a2
 ; RV32ZBB-NEXT:    sub a1, a1, a3
 ; RV32ZBB-NEXT:    sub a1, a1, a4
 ; RV32ZBB-NEXT:    sub a0, a0, a2
+; RV32ZBB-NEXT:  .LBB21_5:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    add a1, a1, a2
+; RV32ZBB-NEXT:    neg a1, a1
+; RV32ZBB-NEXT:    neg a0, a0
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abd_cmp_i64:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    blt a0, a1, .LBB21_2
-; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    sub a0, a1, a0
-; RV64ZBB-NEXT:    ret
-; RV64ZBB-NEXT:  .LBB21_2:
-; RV64ZBB-NEXT:    sub a0, a0, a1
+; RV64ZBB-NEXT:    max a2, a0, a1
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    sub a0, a0, a2
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp slt i64 %a, %b
   %ab = sub i64 %a, %b
@@ -1860,176 +1849,240 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i128:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a3, 0(a2)
-; RV32I-NEXT:    lw a4, 4(a2)
-; RV32I-NEXT:    lw a5, 8(a2)
-; RV32I-NEXT:    lw a7, 12(a2)
-; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    lw a3, 0(a1)
+; RV32I-NEXT:    lw a4, 4(a1)
 ; RV32I-NEXT:    lw a6, 8(a1)
 ; RV32I-NEXT:    lw t1, 12(a1)
-; RV32I-NEXT:    lw a1, 4(a1)
-; RV32I-NEXT:    sltu t0, a6, a5
-; RV32I-NEXT:    mv t4, t0
-; RV32I-NEXT:    beq t1, a7, .LBB22_2
+; RV32I-NEXT:    lw a1, 0(a2)
+; RV32I-NEXT:    lw t0, 8(a2)
+; RV32I-NEXT:    lw t2, 12(a2)
+; RV32I-NEXT:    lw a2, 4(a2)
+; RV32I-NEXT:    sltu t3, t0, a6
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    beq t1, t2, .LBB22_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    slt t4, t1, a7
+; RV32I-NEXT:    slt t4, t2, t1
 ; RV32I-NEXT:  .LBB22_2:
-; RV32I-NEXT:    sltu t2, a2, a3
-; RV32I-NEXT:    mv t3, t2
-; RV32I-NEXT:    beq a1, a4, .LBB22_4
+; RV32I-NEXT:    sltu a5, a1, a3
+; RV32I-NEXT:    sltu t6, a2, a4
+; RV32I-NEXT:    mv a7, a5
+; RV32I-NEXT:    beq a4, a2, .LBB22_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    sltu t3, a1, a4
+; RV32I-NEXT:    mv a7, t6
 ; RV32I-NEXT:  .LBB22_4:
-; RV32I-NEXT:    xor t5, t1, a7
-; RV32I-NEXT:    xor t6, a6, a5
-; RV32I-NEXT:    or t5, t6, t5
-; RV32I-NEXT:    mv t6, t3
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    xor t5, t1, t2
+; RV32I-NEXT:    xor s0, a6, t0
+; RV32I-NEXT:    or t5, s0, t5
 ; RV32I-NEXT:    beqz t5, .LBB22_6
 ; RV32I-NEXT:  # %bb.5:
-; RV32I-NEXT:    mv t6, t4
+; RV32I-NEXT:    mv a7, t4
 ; RV32I-NEXT:  .LBB22_6:
-; RV32I-NEXT:    sltu t4, a3, a2
-; RV32I-NEXT:    mv t5, t4
-; RV32I-NEXT:    beq a1, a4, .LBB22_8
+; RV32I-NEXT:    mv t5, a5
+; RV32I-NEXT:    beq a2, a4, .LBB22_8
 ; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    sltu t5, a4, a1
+; RV32I-NEXT:    mv t5, t6
 ; RV32I-NEXT:  .LBB22_8:
-; RV32I-NEXT:    bnez t6, .LBB22_10
+; RV32I-NEXT:    sltu t4, a3, a1
+; RV32I-NEXT:    mv t6, t4
+; RV32I-NEXT:    beq a4, a2, .LBB22_10
 ; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    sltu t0, a5, a6
-; RV32I-NEXT:    sub a7, a7, t1
-; RV32I-NEXT:    sub a5, a5, a6
-; RV32I-NEXT:    sub a4, a4, a1
-; RV32I-NEXT:    sub a6, a7, t0
-; RV32I-NEXT:    sltu a7, a5, t5
-; RV32I-NEXT:    sub a1, a5, t5
-; RV32I-NEXT:    sub a5, a4, t4
-; RV32I-NEXT:    sub a4, a6, a7
-; RV32I-NEXT:    sub a2, a3, a2
-; RV32I-NEXT:    j .LBB22_11
+; RV32I-NEXT:    sltu t6, a4, a2
 ; RV32I-NEXT:  .LBB22_10:
-; RV32I-NEXT:    sub a7, t1, a7
-; RV32I-NEXT:    sub a5, a6, a5
-; RV32I-NEXT:    sub a4, a1, a4
-; RV32I-NEXT:    sub a6, a7, t0
-; RV32I-NEXT:    sltu a7, a5, t3
-; RV32I-NEXT:    sub a1, a5, t3
-; RV32I-NEXT:    sub a5, a4, t2
-; RV32I-NEXT:    sub a4, a6, a7
-; RV32I-NEXT:    sub a2, a2, a3
-; RV32I-NEXT:  .LBB22_11:
-; RV32I-NEXT:    sw a2, 0(a0)
-; RV32I-NEXT:    sw a5, 4(a0)
-; RV32I-NEXT:    sw a1, 8(a0)
-; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    bnez a7, .LBB22_12
+; RV32I-NEXT:  # %bb.11:
+; RV32I-NEXT:    sub t1, t2, t1
+; RV32I-NEXT:    sub a6, t0, a6
+; RV32I-NEXT:    sub t0, t1, t3
+; RV32I-NEXT:    sltu t1, a6, t5
+; RV32I-NEXT:    sub t0, t0, t1
+; RV32I-NEXT:    sub a6, a6, t5
+; RV32I-NEXT:    j .LBB22_13
+; RV32I-NEXT:  .LBB22_12:
+; RV32I-NEXT:    sltu t3, a6, t0
+; RV32I-NEXT:    sub t1, t1, t2
+; RV32I-NEXT:    sub a6, a6, t0
+; RV32I-NEXT:    sub t0, t1, t3
+; RV32I-NEXT:    sltu t1, a6, t6
+; RV32I-NEXT:    sub t0, t0, t1
+; RV32I-NEXT:    sub a6, a6, t6
+; RV32I-NEXT:  .LBB22_13:
+; RV32I-NEXT:    snez t1, a6
+; RV32I-NEXT:    add t0, t0, t1
+; RV32I-NEXT:    bnez a7, .LBB22_15
+; RV32I-NEXT:  # %bb.14:
+; RV32I-NEXT:    sub a2, a2, a4
+; RV32I-NEXT:    sub a2, a2, a5
+; RV32I-NEXT:    sub a1, a1, a3
+; RV32I-NEXT:    j .LBB22_16
+; RV32I-NEXT:  .LBB22_15:
+; RV32I-NEXT:    sub a4, a4, a2
+; RV32I-NEXT:    sub a2, a4, t4
+; RV32I-NEXT:    sub a1, a3, a1
+; RV32I-NEXT:  .LBB22_16:
+; RV32I-NEXT:    or a3, a1, a2
+; RV32I-NEXT:    neg a4, a6
+; RV32I-NEXT:    neg a5, t0
+; RV32I-NEXT:    snez a6, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    snez a3, a3
+; RV32I-NEXT:    add a2, a2, a6
+; RV32I-NEXT:    sltu a6, a4, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a4, a4, a3
+; RV32I-NEXT:    sub a3, a5, a6
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sw a4, 8(a0)
+; RV32I-NEXT:    sw a3, 12(a0)
+; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i128:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sltu a4, a0, a2
+; RV64I-NEXT:    sltu a4, a2, a0
 ; RV64I-NEXT:    mv a5, a4
 ; RV64I-NEXT:    beq a1, a3, .LBB22_2
 ; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    slt a5, a1, a3
+; RV64I-NEXT:    slt a5, a3, a1
 ; RV64I-NEXT:  .LBB22_2:
 ; RV64I-NEXT:    bnez a5, .LBB22_4
 ; RV64I-NEXT:  # %bb.3:
-; RV64I-NEXT:    sltu a4, a2, a0
 ; RV64I-NEXT:    sub a1, a3, a1
 ; RV64I-NEXT:    sub a1, a1, a4
 ; RV64I-NEXT:    sub a0, a2, a0
-; RV64I-NEXT:    ret
+; RV64I-NEXT:    j .LBB22_5
 ; RV64I-NEXT:  .LBB22_4:
+; RV64I-NEXT:    sltu a4, a0, a2
 ; RV64I-NEXT:    sub a1, a1, a3
 ; RV64I-NEXT:    sub a1, a1, a4
 ; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:  .LBB22_5:
+; RV64I-NEXT:    snez a2, a0
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    neg a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abd_cmp_i128:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a3, 0(a2)
-; RV32ZBB-NEXT:    lw a4, 4(a2)
-; RV32ZBB-NEXT:    lw a5, 8(a2)
-; RV32ZBB-NEXT:    lw a7, 12(a2)
-; RV32ZBB-NEXT:    lw a2, 0(a1)
+; RV32ZBB-NEXT:    lw a3, 0(a1)
+; RV32ZBB-NEXT:    lw a4, 4(a1)
 ; RV32ZBB-NEXT:    lw a6, 8(a1)
 ; RV32ZBB-NEXT:    lw t1, 12(a1)
-; RV32ZBB-NEXT:    lw a1, 4(a1)
-; RV32ZBB-NEXT:    sltu t0, a6, a5
-; RV32ZBB-NEXT:    mv t4, t0
-; RV32ZBB-NEXT:    beq t1, a7, .LBB22_2
+; RV32ZBB-NEXT:    lw a1, 0(a2)
+; RV32ZBB-NEXT:    lw t0, 8(a2)
+; RV32ZBB-NEXT:    lw t2, 12(a2)
+; RV32ZBB-NEXT:    lw a2, 4(a2)
+; RV32ZBB-NEXT:    sltu t3, t0, a6
+; RV32ZBB-NEXT:    mv t4, t3
+; RV32ZBB-NEXT:    beq t1, t2, .LBB22_2
 ; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    slt t4, t1, a7
+; RV32ZBB-NEXT:    slt t4, t2, t1
 ; RV32ZBB-NEXT:  .LBB22_2:
-; RV32ZBB-NEXT:    sltu t2, a2, a3
-; RV32ZBB-NEXT:    mv t3, t2
-; RV32ZBB-NEXT:    beq a1, a4, .LBB22_4
+; RV32ZBB-NEXT:    sltu a5, a1, a3
+; RV32ZBB-NEXT:    sltu t6, a2, a4
+; RV32ZBB-NEXT:    mv a7, a5
+; RV32ZBB-NEXT:    beq a4, a2, .LBB22_4
 ; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    sltu t3, a1, a4
+; RV32ZBB-NEXT:    mv a7, t6
 ; RV32ZBB-NEXT:  .LBB22_4:
-; RV32ZBB-NEXT:    xor t5, t1, a7
-; RV32ZBB-NEXT:    xor t6, a6, a5
-; RV32ZBB-NEXT:    or t5, t6, t5
-; RV32ZBB-NEXT:    mv t6, t3
+; RV32ZBB-NEXT:    addi sp, sp, -16
+; RV32ZBB-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32ZBB-NEXT:    xor t5, t1, t2
+; RV32ZBB-NEXT:    xor s0, a6, t0
+; RV32ZBB-NEXT:    or t5, s0, t5
 ; RV32ZBB-NEXT:    beqz t5, .LBB22_6
 ; RV32ZBB-NEXT:  # %bb.5:
-; RV32ZBB-NEXT:    mv t6, t4
+; RV32ZBB-NEXT:    mv a7, t4
 ; RV32ZBB-NEXT:  .LBB22_6:
-; RV32ZBB-NEXT:    sltu t4, a3, a2
-; RV32ZBB-NEXT:    mv t5, t4
-; RV32ZBB-NEXT:    beq a1, a4, .LBB22_8
+; RV32ZBB-NEXT:    mv t5, a5
+; RV32ZBB-NEXT:    beq a2, a4, .LBB22_8
 ; RV32ZBB-NEXT:  # %bb.7:
-; RV32ZBB-NEXT:    sltu t5, a4, a1
+; RV32ZBB-NEXT:    mv t5, t6
 ; RV32ZBB-NEXT:  .LBB22_8:
-; RV32ZBB-NEXT:    bnez t6, .LBB22_10
+; RV32ZBB-NEXT:    sltu t4, a3, a1
+; RV32ZBB-NEXT:    mv t6, t4
+; RV32ZBB-NEXT:    beq a4, a2, .LBB22_10
 ; RV32ZBB-NEXT:  # %bb.9:
-; RV32ZBB-NEXT:    sltu t0, a5, a6
-; RV32ZBB-NEXT:    sub a7, a7, t1
-; RV32ZBB-NEXT:    sub a5, a5, a6
-; RV32ZBB-NEXT:    sub a4, a4, a1
-; RV32ZBB-NEXT:    sub a6, a7, t0
-; RV32ZBB-NEXT:    sltu a7, a5, t5
-; RV32ZBB-NEXT:    sub a1, a5, t5
-; RV32ZBB-NEXT:    sub a5, a4, t4
-; RV32ZBB-NEXT:    sub a4, a6, a7
-; RV32ZBB-NEXT:    sub a2, a3, a2
-; RV32ZBB-NEXT:    j .LBB22_11
+; RV32ZBB-NEXT:    sltu t6, a4, a2
 ; RV32ZBB-NEXT:  .LBB22_10:
-; RV32ZBB-NEXT:    sub a7, t1, a7
-; RV32ZBB-NEXT:    sub a5, a6, a5
-; RV32ZBB-NEXT:    sub a4, a1, a4
-; RV32ZBB-NEXT:    sub a6, a7, t0
-; RV32ZBB-NEXT:    sltu a7, a5, t3
-; RV32ZBB-NEXT:    sub a1, a5, t3
-; RV32ZBB-NEXT:    sub a5, a4, t2
-; RV32ZBB-NEXT:    sub a4, a6, a7
-; RV32ZBB-NEXT:    sub a2, a2, a3
-; RV32ZBB-NEXT:  .LBB22_11:
-; RV32ZBB-NEXT:    sw a2, 0(a0)
-; RV32ZBB-NEXT:    sw a5, 4(a0)
-; RV32ZBB-NEXT:    sw a1, 8(a0)
-; RV32ZBB-NEXT:    sw a4, 12(a0)
+; RV32ZBB-NEXT:    bnez a7, .LBB22_12
+; RV32ZBB-NEXT:  # %bb.11:
+; RV32ZBB-NEXT:    sub t1, t2, t1
+; RV32ZBB-NEXT:    sub a6, t0, a6
+; RV32ZBB-NEXT:    sub t0, t1, t3
+; RV32ZBB-NEXT:    sltu t1, a6, t5
+; RV32ZBB-NEXT:    sub t0, t0, t1
+; RV32ZBB-NEXT:    sub a6, a6, t5
+; RV32ZBB-NEXT:    j .LBB22_13
+; RV32ZBB-NEXT:  .LBB22_12:
+; RV32ZBB-NEXT:    sltu t3, a6, t0
+; RV32ZBB-NEXT:    sub t1, t1, t2
+; RV32ZBB-NEXT:    sub a6, a6, t0
+; RV32ZBB-NEXT:    sub t0, t1, t3
+; RV32ZBB-NEXT:    sltu t1, a6, t6
+; RV32ZBB-NEXT:    sub t0, t0, t1
+; RV32ZBB-NEXT:    sub a6, a6, t6
+; RV32ZBB-NEXT:  .LBB22_13:
+; RV32ZBB-NEXT:    snez t1, a6
+; RV32ZBB-NEXT:    add t0, t0, t1
+; RV32ZBB-NEXT:    bnez a7, .LBB22_15
+; RV32ZBB-NEXT:  # %bb.14:
+; RV32ZBB-NEXT:    sub a2, a2, a4
+; RV32ZBB-NEXT:    sub a2, a2, a5
+; RV32ZBB-NEXT:    sub a1, a1, a3
+; RV32ZBB-NEXT:    j .LBB22_16
+; RV32ZBB-NEXT:  .LBB22_15:
+; RV32ZBB-NEXT:    sub a4, a4, a2
+; RV32ZBB-NEXT:    sub a2, a4, t4
+; RV32ZBB-NEXT:    sub a1, a3, a1
+; RV32ZBB-NEXT:  .LBB22_16:
+; RV32ZBB-NEXT:    or a3, a1, a2
+; RV32ZBB-NEXT:    neg a4, a6
+; RV32ZBB-NEXT:    neg a5, t0
+; RV32ZBB-NEXT:    snez a6, a1
+; RV32ZBB-NEXT:    neg a1, a1
+; RV32ZBB-NEXT:    snez a3, a3
+; RV32ZBB-NEXT:    add a2, a2, a6
+; RV32ZBB-NEXT:    sltu a6, a4, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a4, a4, a3
+; RV32ZBB-NEXT:    sub a3, a5, a6
+; RV32ZBB-NEXT:    sw a1, 0(a0)
+; RV32ZBB-NEXT:    sw a2, 4(a0)
+; RV32ZBB-NEXT:    sw a4, 8(a0)
+; RV32ZBB-NEXT:    sw a3, 12(a0)
+; RV32ZBB-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZBB-NEXT:    addi sp, sp, 16
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abd_cmp_i128:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sltu a4, a0, a2
+; RV64ZBB-NEXT:    sltu a4, a2, a0
 ; RV64ZBB-NEXT:    mv a5, a4
 ; RV64ZBB-NEXT:    beq a1, a3, .LBB22_2
 ; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    slt a5, a1, a3
+; RV64ZBB-NEXT:    slt a5, a3, a1
 ; RV64ZBB-NEXT:  .LBB22_2:
 ; RV64ZBB-NEXT:    bnez a5, .LBB22_4
 ; RV64ZBB-NEXT:  # %bb.3:
-; RV64ZBB-NEXT:    sltu a4, a2, a0
 ; RV64ZBB-NEXT:    sub a1, a3, a1
 ; RV64ZBB-NEXT:    sub a1, a1, a4
 ; RV64ZBB-NEXT:    sub a0, a2, a0
-; RV64ZBB-NEXT:    ret
+; RV64ZBB-NEXT:    j .LBB22_5
 ; RV64ZBB-NEXT:  .LBB22_4:
+; RV64ZBB-NEXT:    sltu a4, a0, a2
 ; RV64ZBB-NEXT:    sub a1, a1, a3
 ; RV64ZBB-NEXT:    sub a1, a1, a4
 ; RV64ZBB-NEXT:    sub a0, a0, a2
+; RV64ZBB-NEXT:  .LBB22_5:
+; RV64ZBB-NEXT:    snez a2, a0
+; RV64ZBB-NEXT:    add a1, a1, a2
+; RV64ZBB-NEXT:    neg a1, a1
+; RV64ZBB-NEXT:    neg a0, a0
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp slt i128 %a, %b
   %ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll
index 713b52f53e3d9..30f41f73199c9 100644
--- a/llvm/test/CodeGen/RISCV/abdu-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll
@@ -1577,28 +1577,33 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
 ;
 
 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
-; NOZBB-LABEL: abd_cmp_i8:
-; NOZBB:       # %bb.0:
-; NOZBB-NEXT:    zext.b a2, a0
-; NOZBB-NEXT:    zext.b a3, a1
-; NOZBB-NEXT:    bgeu a3, a2, .LBB18_2
-; NOZBB-NEXT:  # %bb.1:
-; NOZBB-NEXT:    sub a0, a1, a0
-; NOZBB-NEXT:    ret
-; NOZBB-NEXT:  .LBB18_2:
-; NOZBB-NEXT:    sub a0, a0, a1
-; NOZBB-NEXT:    ret
+; RV32I-LABEL: abd_cmp_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    zext.b a1, a1
+; RV32I-NEXT:    zext.b a0, a0
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: abd_cmp_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    zext.b a1, a1
+; RV64I-NEXT:    zext.b a0, a0
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
 ;
 ; ZBB-LABEL: abd_cmp_i8:
 ; ZBB:       # %bb.0:
-; ZBB-NEXT:    zext.b a2, a0
-; ZBB-NEXT:    zext.b a3, a1
-; ZBB-NEXT:    bgeu a3, a2, .LBB18_2
-; ZBB-NEXT:  # %bb.1:
-; ZBB-NEXT:    sub a0, a1, a0
-; ZBB-NEXT:    ret
-; ZBB-NEXT:  .LBB18_2:
-; ZBB-NEXT:    sub a0, a0, a1
+; ZBB-NEXT:    zext.b a1, a1
+; ZBB-NEXT:    zext.b a0, a0
+; ZBB-NEXT:    maxu a2, a0, a1
+; ZBB-NEXT:    minu a0, a0, a1
+; ZBB-NEXT:    sub a0, a0, a2
 ; ZBB-NEXT:    ret
   %cmp = icmp ule i8 %a, %b
   %ab = sub i8 %a, %b
@@ -1608,30 +1613,37 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 }
 
 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
-; NOZBB-LABEL: abd_cmp_i16:
-; NOZBB:       # %bb.0:
-; NOZBB-NEXT:    lui a2, 16
-; NOZBB-NEXT:    addi a2, a2, -1
-; NOZBB-NEXT:    and a3, a1, a2
-; NOZBB-NEXT:    and a2, a0, a2
-; NOZBB-NEXT:    bltu a2, a3, .LBB19_2
-; NOZBB-NEXT:  # %bb.1:
-; NOZBB-NEXT:    sub a0, a1, a0
-; NOZBB-NEXT:    ret
-; NOZBB-NEXT:  .LBB19_2:
-; NOZBB-NEXT:    sub a0, a0, a1
-; NOZBB-NEXT:    ret
+; RV32I-LABEL: abd_cmp_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: abd_cmp_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
 ;
 ; ZBB-LABEL: abd_cmp_i16:
 ; ZBB:       # %bb.0:
-; ZBB-NEXT:    zext.h a2, a1
-; ZBB-NEXT:    zext.h a3, a0
-; ZBB-NEXT:    bltu a3, a2, .LBB19_2
-; ZBB-NEXT:  # %bb.1:
-; ZBB-NEXT:    sub a0, a1, a0
-; ZBB-NEXT:    ret
-; ZBB-NEXT:  .LBB19_2:
-; ZBB-NEXT:    sub a0, a0, a1
+; ZBB-NEXT:    zext.h a1, a1
+; ZBB-NEXT:    zext.h a0, a0
+; ZBB-NEXT:    maxu a2, a0, a1
+; ZBB-NEXT:    minu a0, a0, a1
+; ZBB-NEXT:    sub a0, a0, a2
 ; ZBB-NEXT:    ret
   %cmp = icmp ult i16 %a, %b
   %ab = sub i16 %a, %b
@@ -1643,46 +1655,44 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    bgeu a0, a1, .LBB20_2
+; RV32I-NEXT:    bltu a1, a0, .LBB20_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB20_2:
-; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a2, a1
-; RV64I-NEXT:    sext.w a3, a0
-; RV64I-NEXT:    bgeu a3, a2, .LBB20_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    subw a0, a0, a1
-; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB20_2:
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abd_cmp_i32:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    bgeu a0, a1, .LBB20_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sub a0, a0, a1
-; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB20_2:
-; RV32ZBB-NEXT:    sub a0, a1, a0
+; RV32ZBB-NEXT:    maxu a2, a0, a1
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    sub a0, a0, a2
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abd_cmp_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sext.w a2, a1
-; RV64ZBB-NEXT:    sext.w a3, a0
-; RV64ZBB-NEXT:    bgeu a3, a2, .LBB20_2
-; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    subw a0, a0, a1
-; RV64ZBB-NEXT:    ret
-; RV64ZBB-NEXT:  .LBB20_2:
-; RV64ZBB-NEXT:    subw a0, a1, a0
+; RV64ZBB-NEXT:    slli a1, a1, 32
+; RV64ZBB-NEXT:    slli a0, a0, 32
+; RV64ZBB-NEXT:    srli a1, a1, 32
+; RV64ZBB-NEXT:    srli a0, a0, 32
+; RV64ZBB-NEXT:    maxu a2, a0, a1
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    subw a0, a0, a2
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp uge i32 %a, %b
   %ab = sub i32 %a, %b
@@ -1695,63 +1705,72 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    sltu a4, a0, a2
-; RV32I-NEXT:    mv a5, a4
-; RV32I-NEXT:    beq a1, a3, .LBB21_2
+; RV32I-NEXT:    sub a3, a1, a3
+; RV32I-NEXT:    sub a3, a3, a4
+; RV32I-NEXT:    sub a2, a0, a2
+; RV32I-NEXT:    beq a3, a1, .LBB21_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltu a5, a1, a3
+; RV32I-NEXT:    sltu a0, a1, a3
+; RV32I-NEXT:    j .LBB21_3
 ; RV32I-NEXT:  .LBB21_2:
-; RV32I-NEXT:    bnez a5, .LBB21_4
-; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    sltu a4, a2, a0
-; RV32I-NEXT:    sub a1, a3, a1
-; RV32I-NEXT:    sub a1, a1, a4
-; RV32I-NEXT:    sub a0, a2, a0
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB21_4:
-; RV32I-NEXT:    sub a1, a1, a3
-; RV32I-NEXT:    sub a1, a1, a4
-; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:    sltu a0, a0, a2
+; RV32I-NEXT:  .LBB21_3:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    xor a2, a2, a1
+; RV32I-NEXT:    xor a3, a3, a1
+; RV32I-NEXT:    sltu a1, a2, a1
+; RV32I-NEXT:    add a3, a3, a0
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    sub a3, a3, a1
+; RV32I-NEXT:    snez a1, a0
+; RV32I-NEXT:    add a1, a3, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    bltu a0, a1, .LBB21_2
+; RV64I-NEXT:    bltu a1, a0, .LBB21_2
 ; RV64I-NEXT:  # %bb.1:
 ; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    neg a0, a0
 ; RV64I-NEXT:    ret
 ; RV64I-NEXT:  .LBB21_2:
 ; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    neg a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abd_cmp_i64:
 ; RV32ZBB:       # %bb.0:
 ; RV32ZBB-NEXT:    sltu a4, a0, a2
-; RV32ZBB-NEXT:    mv a5, a4
-; RV32ZBB-NEXT:    beq a1, a3, .LBB21_2
+; RV32ZBB-NEXT:    sub a3, a1, a3
+; RV32ZBB-NEXT:    sub a3, a3, a4
+; RV32ZBB-NEXT:    sub a2, a0, a2
+; RV32ZBB-NEXT:    beq a3, a1, .LBB21_2
 ; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sltu a5, a1, a3
+; RV32ZBB-NEXT:    sltu a0, a1, a3
+; RV32ZBB-NEXT:    j .LBB21_3
 ; RV32ZBB-NEXT:  .LBB21_2:
-; RV32ZBB-NEXT:    bnez a5, .LBB21_4
-; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    sltu a4, a2, a0
-; RV32ZBB-NEXT:    sub a1, a3, a1
-; RV32ZBB-NEXT:    sub a1, a1, a4
-; RV32ZBB-NEXT:    sub a0, a2, a0
-; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB21_4:
-; RV32ZBB-NEXT:    sub a1, a1, a3
-; RV32ZBB-NEXT:    sub a1, a1, a4
-; RV32ZBB-NEXT:    sub a0, a0, a2
+; RV32ZBB-NEXT:    sltu a0, a0, a2
+; RV32ZBB-NEXT:  .LBB21_3:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    xor a2, a2, a1
+; RV32ZBB-NEXT:    xor a3, a3, a1
+; RV32ZBB-NEXT:    sltu a1, a2, a1
+; RV32ZBB-NEXT:    add a3, a3, a0
+; RV32ZBB-NEXT:    add a0, a2, a0
+; RV32ZBB-NEXT:    sub a3, a3, a1
+; RV32ZBB-NEXT:    snez a1, a0
+; RV32ZBB-NEXT:    add a1, a3, a1
+; RV32ZBB-NEXT:    neg a1, a1
+; RV32ZBB-NEXT:    neg a0, a0
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abd_cmp_i64:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    bltu a0, a1, .LBB21_2
-; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    sub a0, a1, a0
-; RV64ZBB-NEXT:    ret
-; RV64ZBB-NEXT:  .LBB21_2:
-; RV64ZBB-NEXT:    sub a0, a0, a1
+; RV64ZBB-NEXT:    maxu a2, a0, a1
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    sub a0, a0, a2
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i64 %a, %b
   %ab = sub i64 %a, %b
@@ -1763,176 +1782,226 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
 ; RV32I-LABEL: abd_cmp_i128:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a3, 0(a2)
-; RV32I-NEXT:    lw a4, 4(a2)
-; RV32I-NEXT:    lw a5, 8(a2)
-; RV32I-NEXT:    lw a7, 12(a2)
+; RV32I-NEXT:    lw a5, 0(a2)
+; RV32I-NEXT:    lw a7, 4(a2)
+; RV32I-NEXT:    lw a3, 8(a2)
+; RV32I-NEXT:    lw t1, 12(a2)
+; RV32I-NEXT:    lw a4, 8(a1)
+; RV32I-NEXT:    lw a6, 12(a1)
 ; RV32I-NEXT:    lw a2, 0(a1)
-; RV32I-NEXT:    lw a6, 8(a1)
-; RV32I-NEXT:    lw t1, 12(a1)
-; RV32I-NEXT:    lw a1, 4(a1)
-; RV32I-NEXT:    sltu t0, a6, a5
-; RV32I-NEXT:    mv t4, t0
-; RV32I-NEXT:    beq t1, a7, .LBB22_2
+; RV32I-NEXT:    lw t0, 4(a1)
+; RV32I-NEXT:    sltu a1, a4, a3
+; RV32I-NEXT:    sub t1, a6, t1
+; RV32I-NEXT:    sltu t2, a2, a5
+; RV32I-NEXT:    sub a1, t1, a1
+; RV32I-NEXT:    mv t1, t2
+; RV32I-NEXT:    beq t0, a7, .LBB22_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltu t4, t1, a7
+; RV32I-NEXT:    sltu t1, t0, a7
 ; RV32I-NEXT:  .LBB22_2:
-; RV32I-NEXT:    sltu t2, a2, a3
-; RV32I-NEXT:    mv t3, t2
-; RV32I-NEXT:    beq a1, a4, .LBB22_4
+; RV32I-NEXT:    sub a3, a4, a3
+; RV32I-NEXT:    sltu t3, a3, t1
+; RV32I-NEXT:    sub a1, a1, t3
+; RV32I-NEXT:    sub a3, a3, t1
+; RV32I-NEXT:    beq a1, a6, .LBB22_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    sltu t3, a1, a4
+; RV32I-NEXT:    sltu t1, a6, a1
+; RV32I-NEXT:    j .LBB22_5
 ; RV32I-NEXT:  .LBB22_4:
-; RV32I-NEXT:    xor t5, t1, a7
-; RV32I-NEXT:    xor t6, a6, a5
-; RV32I-NEXT:    or t5, t6, t5
-; RV32I-NEXT:    mv t6, t3
-; RV32I-NEXT:    beqz t5, .LBB22_6
-; RV32I-NEXT:  # %bb.5:
-; RV32I-NEXT:    mv t6, t4
-; RV32I-NEXT:  .LBB22_6:
-; RV32I-NEXT:    sltu t4, a3, a2
-; RV32I-NEXT:    mv t5, t4
-; RV32I-NEXT:    beq a1, a4, .LBB22_8
-; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    sltu t5, a4, a1
+; RV32I-NEXT:    sltu t1, a4, a3
+; RV32I-NEXT:  .LBB22_5:
+; RV32I-NEXT:    sub a7, t0, a7
+; RV32I-NEXT:    sub a7, a7, t2
+; RV32I-NEXT:    sub a5, a2, a5
+; RV32I-NEXT:    beq a7, t0, .LBB22_7
+; RV32I-NEXT:  # %bb.6:
+; RV32I-NEXT:    sltu a2, t0, a7
+; RV32I-NEXT:    j .LBB22_8
+; RV32I-NEXT:  .LBB22_7:
+; RV32I-NEXT:    sltu a2, a2, a5
 ; RV32I-NEXT:  .LBB22_8:
-; RV32I-NEXT:    bnez t6, .LBB22_10
+; RV32I-NEXT:    xor a6, a1, a6
+; RV32I-NEXT:    xor a4, a3, a4
+; RV32I-NEXT:    or a4, a4, a6
+; RV32I-NEXT:    beqz a4, .LBB22_10
 ; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    sltu t0, a5, a6
-; RV32I-NEXT:    sub a7, a7, t1
-; RV32I-NEXT:    sub a5, a5, a6
-; RV32I-NEXT:    sub a4, a4, a1
-; RV32I-NEXT:    sub a6, a7, t0
-; RV32I-NEXT:    sltu a7, a5, t5
-; RV32I-NEXT:    sub a1, a5, t5
-; RV32I-NEXT:    sub a5, a4, t4
-; RV32I-NEXT:    sub a4, a6, a7
-; RV32I-NEXT:    sub a2, a3, a2
-; RV32I-NEXT:    j .LBB22_11
+; RV32I-NEXT:    mv a2, t1
 ; RV32I-NEXT:  .LBB22_10:
-; RV32I-NEXT:    sub a7, t1, a7
-; RV32I-NEXT:    sub a5, a6, a5
-; RV32I-NEXT:    sub a4, a1, a4
-; RV32I-NEXT:    sub a6, a7, t0
-; RV32I-NEXT:    sltu a7, a5, t3
-; RV32I-NEXT:    sub a1, a5, t3
-; RV32I-NEXT:    sub a5, a4, t2
-; RV32I-NEXT:    sub a4, a6, a7
-; RV32I-NEXT:    sub a2, a2, a3
-; RV32I-NEXT:  .LBB22_11:
-; RV32I-NEXT:    sw a2, 0(a0)
-; RV32I-NEXT:    sw a5, 4(a0)
-; RV32I-NEXT:    sw a1, 8(a0)
-; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    xor t0, a5, a4
+; RV32I-NEXT:    xor t3, a7, a4
+; RV32I-NEXT:    sltu a5, t0, a4
+; RV32I-NEXT:    add a6, t3, a2
+; RV32I-NEXT:    add t0, t0, a2
+; RV32I-NEXT:    sub t1, a6, a5
+; RV32I-NEXT:    snez a6, t1
+; RV32I-NEXT:    snez t2, t0
+; RV32I-NEXT:    or a6, t2, a6
+; RV32I-NEXT:    beqz a7, .LBB22_12
+; RV32I-NEXT:  # %bb.11:
+; RV32I-NEXT:    sltu a5, t3, a4
+; RV32I-NEXT:  .LBB22_12:
+; RV32I-NEXT:    xor a3, a3, a4
+; RV32I-NEXT:    xor a1, a1, a4
+; RV32I-NEXT:    add t1, t1, t2
+; RV32I-NEXT:    neg a7, t0
+; RV32I-NEXT:    add t0, a3, a2
+; RV32I-NEXT:    sltu a3, a3, a4
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a2, t1
+; RV32I-NEXT:    sub a4, t0, a5
+; RV32I-NEXT:    sub a1, a1, a3
+; RV32I-NEXT:    sltu a3, t0, a5
+; RV32I-NEXT:    neg a5, a4
+; RV32I-NEXT:    sub a1, a1, a3
+; RV32I-NEXT:    snez a3, a4
+; RV32I-NEXT:    sltu a4, a5, a6
+; RV32I-NEXT:    add a1, a1, a3
+; RV32I-NEXT:    sub a3, a5, a6
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    sub a1, a1, a4
+; RV32I-NEXT:    sw a7, 0(a0)
+; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    sw a1, 12(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: abd_cmp_i128:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sltu a4, a0, a2
-; RV64I-NEXT:    mv a5, a4
-; RV64I-NEXT:    beq a1, a3, .LBB22_2
+; RV64I-NEXT:    sub a3, a1, a3
+; RV64I-NEXT:    sub a3, a3, a4
+; RV64I-NEXT:    sub a2, a0, a2
+; RV64I-NEXT:    beq a3, a1, .LBB22_2
 ; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    sltu a5, a1, a3
+; RV64I-NEXT:    sltu a0, a1, a3
+; RV64I-NEXT:    j .LBB22_3
 ; RV64I-NEXT:  .LBB22_2:
-; RV64I-NEXT:    bnez a5, .LBB22_4
-; RV64I-NEXT:  # %bb.3:
-; RV64I-NEXT:    sltu a4, a2, a0
-; RV64I-NEXT:    sub a1, a3, a1
-; RV64I-NEXT:    sub a1, a1, a4
-; RV64I-NEXT:    sub a0, a2, a0
-; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB22_4:
-; RV64I-NEXT:    sub a1, a1, a3
-; RV64I-NEXT:    sub a1, a1, a4
-; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:    sltu a0, a0, a2
+; RV64I-NEXT:  .LBB22_3:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    xor a2, a2, a1
+; RV64I-NEXT:    xor a3, a3, a1
+; RV64I-NEXT:    sltu a1, a2, a1
+; RV64I-NEXT:    add a3, a3, a0
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    sub a3, a3, a1
+; RV64I-NEXT:    snez a1, a0
+; RV64I-NEXT:    add a1, a3, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    neg a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abd_cmp_i128:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a3, 0(a2)
-; RV32ZBB-NEXT:    lw a4, 4(a2)
-; RV32ZBB-NEXT:    lw a5, 8(a2)
-; RV32ZBB-NEXT:    lw a7, 12(a2)
+; RV32ZBB-NEXT:    lw a5, 0(a2)
+; RV32ZBB-NEXT:    lw a7, 4(a2)
+; RV32ZBB-NEXT:    lw a3, 8(a2)
+; RV32ZBB-NEXT:    lw t1, 12(a2)
+; RV32ZBB-NEXT:    lw a4, 8(a1)
+; RV32ZBB-NEXT:    lw a6, 12(a1)
 ; RV32ZBB-NEXT:    lw a2, 0(a1)
-; RV32ZBB-NEXT:    lw a6, 8(a1)
-; RV32ZBB-NEXT:    lw t1, 12(a1)
-; RV32ZBB-NEXT:    lw a1, 4(a1)
-; RV32ZBB-NEXT:    sltu t0, a6, a5
-; RV32ZBB-NEXT:    mv t4, t0
-; RV32ZBB-NEXT:    beq t1, a7, .LBB22_2
+; RV32ZBB-NEXT:    lw t0, 4(a1)
+; RV32ZBB-NEXT:    sltu a1, a4, a3
+; RV32ZBB-NEXT:    sub t1, a6, t1
+; RV32ZBB-NEXT:    sltu t2, a2, a5
+; RV32ZBB-NEXT:    sub a1, t1, a1
+; RV32ZBB-NEXT:    mv t1, t2
+; RV32ZBB-NEXT:    beq t0, a7, .LBB22_2
 ; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sltu t4, t1, a7
+; RV32ZBB-NEXT:    sltu t1, t0, a7
 ; RV32ZBB-NEXT:  .LBB22_2:
-; RV32ZBB-NEXT:    sltu t2, a2, a3
-; RV32ZBB-NEXT:    mv t3, t2
-; RV32ZBB-NEXT:    beq a1, a4, .LBB22_4
+; RV32ZBB-NEXT:    sub a3, a4, a3
+; RV32ZBB-NEXT:    sltu t3, a3, t1
+; RV32ZBB-NEXT:    sub a1, a1, t3
+; RV32ZBB-NEXT:    sub a3, a3, t1
+; RV32ZBB-NEXT:    beq a1, a6, .LBB22_4
 ; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    sltu t3, a1, a4
+; RV32ZBB-NEXT:    sltu t1, a6, a1
+; RV32ZBB-NEXT:    j .LBB22_5
 ; RV32ZBB-NEXT:  .LBB22_4:
-; RV32ZBB-NEXT:    xor t5, t1, a7
-; RV32ZBB-NEXT:    xor t6, a6, a5
-; RV32ZBB-NEXT:    or t5, t6, t5
-; RV32ZBB-NEXT:    mv t6, t3
-; RV32ZBB-NEXT:    beqz t5, .LBB22_6
-; RV32ZBB-NEXT:  # %bb.5:
-; RV32ZBB-NEXT:    mv t6, t4
-; RV32ZBB-NEXT:  .LBB22_6:
-; RV32ZBB-NEXT:    sltu t4, a3, a2
-; RV32ZBB-NEXT:    mv t5, t4
-; RV32ZBB-NEXT:    beq a1, a4, .LBB22_8
-; RV32ZBB-NEXT:  # %bb.7:
-; RV32ZBB-NEXT:    sltu t5, a4, a1
+; RV32ZBB-NEXT:    sltu t1, a4, a3
+; RV32ZBB-NEXT:  .LBB22_5:
+; RV32ZBB-NEXT:    sub a7, t0, a7
+; RV32ZBB-NEXT:    sub a7, a7, t2
+; RV32ZBB-NEXT:    sub a5, a2, a5
+; RV32ZBB-NEXT:    beq a7, t0, .LBB22_7
+; RV32ZBB-NEXT:  # %bb.6:
+; RV32ZBB-NEXT:    sltu a2, t0, a7
+; RV32ZBB-NEXT:    j .LBB22_8
+; RV32ZBB-NEXT:  .LBB22_7:
+; RV32ZBB-NEXT:    sltu a2, a2, a5
 ; RV32ZBB-NEXT:  .LBB22_8:
-; RV32ZBB-NEXT:    bnez t6, .LBB22_10
+; RV32ZBB-NEXT:    xor a6, a1, a6
+; RV32ZBB-NEXT:    xor a4, a3, a4
+; RV32ZBB-NEXT:    or a4, a4, a6
+; RV32ZBB-NEXT:    beqz a4, .LBB22_10
 ; RV32ZBB-NEXT:  # %bb.9:
-; RV32ZBB-NEXT:    sltu t0, a5, a6
-; RV32ZBB-NEXT:    sub a7, a7, t1
-; RV32ZBB-NEXT:    sub a5, a5, a6
-; RV32ZBB-NEXT:    sub a4, a4, a1
-; RV32ZBB-NEXT:    sub a6, a7, t0
-; RV32ZBB-NEXT:    sltu a7, a5, t5
-; RV32ZBB-NEXT:    sub a1, a5, t5
-; RV32ZBB-NEXT:    sub a5, a4, t4
-; RV32ZBB-NEXT:    sub a4, a6, a7
-; RV32ZBB-NEXT:    sub a2, a3, a2
-; RV32ZBB-NEXT:    j .LBB22_11
+; RV32ZBB-NEXT:    mv a2, t1
 ; RV32ZBB-NEXT:  .LBB22_10:
-; RV32ZBB-NEXT:    sub a7, t1, a7
-; RV32ZBB-NEXT:    sub a5, a6, a5
-; RV32ZBB-NEXT:    sub a4, a1, a4
-; RV32ZBB-NEXT:    sub a6, a7, t0
-; RV32ZBB-NEXT:    sltu a7, a5, t3
-; RV32ZBB-NEXT:    sub a1, a5, t3
-; RV32ZBB-NEXT:    sub a5, a4, t2
-; RV32ZBB-NEXT:    sub a4, a6, a7
-; RV32ZBB-NEXT:    sub a2, a2, a3
-; RV32ZBB-NEXT:  .LBB22_11:
-; RV32ZBB-NEXT:    sw a2, 0(a0)
-; RV32ZBB-NEXT:    sw a5, 4(a0)
-; RV32ZBB-NEXT:    sw a1, 8(a0)
-; RV32ZBB-NEXT:    sw a4, 12(a0)
+; RV32ZBB-NEXT:    neg a4, a2
+; RV32ZBB-NEXT:    xor t0, a5, a4
+; RV32ZBB-NEXT:    xor t3, a7, a4
+; RV32ZBB-NEXT:    sltu a5, t0, a4
+; RV32ZBB-NEXT:    add a6, t3, a2
+; RV32ZBB-NEXT:    add t0, t0, a2
+; RV32ZBB-NEXT:    sub t1, a6, a5
+; RV32ZBB-NEXT:    snez a6, t1
+; RV32ZBB-NEXT:    snez t2, t0
+; RV32ZBB-NEXT:    or a6, t2, a6
+; RV32ZBB-NEXT:    beqz a7, .LBB22_12
+; RV32ZBB-NEXT:  # %bb.11:
+; RV32ZBB-NEXT:    sltu a5, t3, a4
+; RV32ZBB-NEXT:  .LBB22_12:
+; RV32ZBB-NEXT:    xor a3, a3, a4
+; RV32ZBB-NEXT:    xor a1, a1, a4
+; RV32ZBB-NEXT:    add t1, t1, t2
+; RV32ZBB-NEXT:    neg a7, t0
+; RV32ZBB-NEXT:    add t0, a3, a2
+; RV32ZBB-NEXT:    sltu a3, a3, a4
+; RV32ZBB-NEXT:    add a1, a1, a2
+; RV32ZBB-NEXT:    neg a2, t1
+; RV32ZBB-NEXT:    sub a4, t0, a5
+; RV32ZBB-NEXT:    sub a1, a1, a3
+; RV32ZBB-NEXT:    sltu a3, t0, a5
+; RV32ZBB-NEXT:    neg a5, a4
+; RV32ZBB-NEXT:    sub a1, a1, a3
+; RV32ZBB-NEXT:    snez a3, a4
+; RV32ZBB-NEXT:    sltu a4, a5, a6
+; RV32ZBB-NEXT:    add a1, a1, a3
+; RV32ZBB-NEXT:    sub a3, a5, a6
+; RV32ZBB-NEXT:    neg a1, a1
+; RV32ZBB-NEXT:    sub a1, a1, a4
+; RV32ZBB-NEXT:    sw a7, 0(a0)
+; RV32ZBB-NEXT:    sw a2, 4(a0)
+; RV32ZBB-NEXT:    sw a3, 8(a0)
+; RV32ZBB-NEXT:    sw a1, 12(a0)
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abd_cmp_i128:
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    sltu a4, a0, a2
-; RV64ZBB-NEXT:    mv a5, a4
-; RV64ZBB-NEXT:    beq a1, a3, .LBB22_2
+; RV64ZBB-NEXT:    sub a3, a1, a3
+; RV64ZBB-NEXT:    sub a3, a3, a4
+; RV64ZBB-NEXT:    sub a2, a0, a2
+; RV64ZBB-NEXT:    beq a3, a1, .LBB22_2
 ; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    sltu a5, a1, a3
+; RV64ZBB-NEXT:    sltu a0, a1, a3
+; RV64ZBB-NEXT:    j .LBB22_3
 ; RV64ZBB-NEXT:  .LBB22_2:
-; RV64ZBB-NEXT:    bnez a5, .LBB22_4
-; RV64ZBB-NEXT:  # %bb.3:
-; RV64ZBB-NEXT:    sltu a4, a2, a0
-; RV64ZBB-NEXT:    sub a1, a3, a1
-; RV64ZBB-NEXT:    sub a1, a1, a4
-; RV64ZBB-NEXT:    sub a0, a2, a0
-; RV64ZBB-NEXT:    ret
-; RV64ZBB-NEXT:  .LBB22_4:
-; RV64ZBB-NEXT:    sub a1, a1, a3
-; RV64ZBB-NEXT:    sub a1, a1, a4
-; RV64ZBB-NEXT:    sub a0, a0, a2
+; RV64ZBB-NEXT:    sltu a0, a0, a2
+; RV64ZBB-NEXT:  .LBB22_3:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    xor a2, a2, a1
+; RV64ZBB-NEXT:    xor a3, a3, a1
+; RV64ZBB-NEXT:    sltu a1, a2, a1
+; RV64ZBB-NEXT:    add a3, a3, a0
+; RV64ZBB-NEXT:    add a0, a2, a0
+; RV64ZBB-NEXT:    sub a3, a3, a1
+; RV64ZBB-NEXT:    snez a1, a0
+; RV64ZBB-NEXT:    add a1, a3, a1
+; RV64ZBB-NEXT:    neg a1, a1
+; RV64ZBB-NEXT:    neg a0, a0
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i128 %a, %b
   %ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index d9064c684cb20..b5f270d4024b3 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -828,17 +828,20 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl %edx, %edi
 ; X86-NEXT:    subl %eax, %edi
 ; X86-NEXT:    movl %esi, %ebx
-; X86-NEXT:    sbbl %edx, %ebx
-; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    sbbl %esi, %edx
-; X86-NEXT:    cmovgel %edi, %eax
-; X86-NEXT:    cmovgel %ebx, %edx
+; X86-NEXT:    sbbl %ecx, %ebx
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    cmovll %ebx, %ecx
+; X86-NEXT:    cmovll %edi, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    negl %eax
+; X86-NEXT:    sbbl %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
@@ -868,34 +871,42 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    movl 24(%ebp), %ecx
-; X86-NEXT:    movl 28(%ebp), %edx
-; X86-NEXT:    movl 40(%ebp), %eax
-; X86-NEXT:    movl 44(%ebp), %esi
-; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl 40(%ebp), %ecx
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl 24(%ebp), %edx
+; X86-NEXT:    movl 28(%ebp), %esi
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    sbbl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 48(%ebp), %edx
+; X86-NEXT:    movl 32(%ebp), %ebx
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    movl 52(%ebp), %esi
+; X86-NEXT:    movl 36(%ebp), %edi
+; X86-NEXT:    sbbl %esi, %edi
+; X86-NEXT:    subl 24(%ebp), %ecx
+; X86-NEXT:    sbbl 28(%ebp), %eax
+; X86-NEXT:    sbbl 32(%ebp), %edx
+; X86-NEXT:    sbbl 36(%ebp), %esi
+; X86-NEXT:    cmovll %edi, %esi
+; X86-NEXT:    cmovll %ebx, %edx
+; X86-NEXT:    cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %eax, %ebx
+; X86-NEXT:    movl $0, %eax
 ; X86-NEXT:    sbbl %edx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 32(%ebp), %esi
-; X86-NEXT:    movl 48(%ebp), %edi
 ; X86-NEXT:    sbbl %esi, %edi
-; X86-NEXT:    movl 36(%ebp), %ebx
-; X86-NEXT:    movl 52(%ebp), %eax
-; X86-NEXT:    sbbl %ebx, %eax
-; X86-NEXT:    subl 40(%ebp), %ecx
-; X86-NEXT:    sbbl 44(%ebp), %edx
-; X86-NEXT:    sbbl 48(%ebp), %esi
-; X86-NEXT:    sbbl 52(%ebp), %ebx
-; X86-NEXT:    cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    cmovgel %edi, %esi
-; X86-NEXT:    cmovgel %eax, %ebx
-; X86-NEXT:    movl 8(%ebp), %eax
-; X86-NEXT:    movl %ebx, 12(%eax)
-; X86-NEXT:    movl %esi, 8(%eax)
-; X86-NEXT:    movl %edx, 4(%eax)
-; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    movl 8(%ebp), %edx
+; X86-NEXT:    movl %ecx, (%edx)
+; X86-NEXT:    movl %ebx, 4(%edx)
+; X86-NEXT:    movl %eax, 8(%edx)
+; X86-NEXT:    movl %edi, 12(%edx)
+; X86-NEXT:    movl %edx, %eax
 ; X86-NEXT:    leal -12(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
@@ -905,15 +916,17 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
 ;
 ; X64-LABEL: abd_cmp_i128:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdx, %rax
-; X64-NEXT:    subq %rdi, %rax
-; X64-NEXT:    movq %rcx, %r8
-; X64-NEXT:    sbbq %rsi, %r8
-; X64-NEXT:    subq %rdx, %rdi
-; X64-NEXT:    sbbq %rcx, %rsi
-; X64-NEXT:    cmovlq %rdi, %rax
-; X64-NEXT:    cmovlq %rsi, %r8
-; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    movq %rsi, %r8
+; X64-NEXT:    sbbq %rcx, %r8
+; X64-NEXT:    subq %rdi, %rdx
+; X64-NEXT:    sbbq %rsi, %rcx
+; X64-NEXT:    cmovlq %r8, %rcx
+; X64-NEXT:    cmovgeq %rdx, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    negq %rax
+; X64-NEXT:    sbbq %rcx, %rdx
 ; X64-NEXT:    retq
   %cmp = icmp slt i128 %a, %b
   %ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index b9e01fda29615..1403f4f03ef44 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -958,20 +958,18 @@ define i32 @abdu_select(i32 %x, i32 %y) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    subl %ecx, %edx
-; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    subl %eax, %edx
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    cmovbel %edx, %eax
+; X86-NEXT:    cmovbl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: abdu_select:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    cmoval %edi, %eax
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    cmovael %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 %x, %y
   %cmp = icmp ugt i32 %x, %y



More information about the llvm-commits mailing list