[llvm] [DAGCombiner] Add pattern matching for negated subtraction in ABDU selection (PR #156217)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 05:49:57 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/156217
>From 679ddf32452d13dc3ea6b1022237c2270ca04115 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 30 Aug 2025 21:28:20 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
llvm/test/CodeGen/X86/abdu.ll | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 043c9155f52f9..b9e01fda29615 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -953,6 +953,33 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
ret i128 %sub
}
+define i32 @abdu_select(i32 %x, i32 %y) {
+; X86-LABEL: abdu_select:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovbel %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: abdu_select:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: subl %esi, %edi
+; X64-NEXT: cmoval %edi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 %x, %y
+ %cmp = icmp ugt i32 %x, %y
+ %sub1 = sub i32 0, %sub
+ %cond = select i1 %cmp, i32 %sub, i32 %sub1
+ ret i32 %cond
+}
+
declare i8 @llvm.abs.i8(i8, i1)
declare i16 @llvm.abs.i16(i16, i1)
declare i32 @llvm.abs.i32(i32, i1)
>From c61539c5f1c38e56395e99aaf4b5372bd26ceac1 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 30 Aug 2025 21:37:39 -0400
Subject: [PATCH 2/2] [DAGCombiner] Add pattern matching for negated
subtraction in ABDU selection
select(ugt x, y), sub(x, y), sub(0, sub(x, y)) -> abdu(x, y)
This is because -diff is the same as y - x.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 +-
llvm/test/CodeGen/AArch64/abds-neg.ll | 30 +-
llvm/test/CodeGen/AArch64/abdu-neg.ll | 30 +-
llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll | 7 +-
llvm/test/CodeGen/RISCV/abds-neg.ll | 447 ++++++++-------
llvm/test/CodeGen/RISCV/abdu-neg.ll | 539 ++++++++++--------
llvm/test/CodeGen/X86/abds-neg.ll | 97 ++--
llvm/test/CodeGen/X86/abdu.ll | 12 +-
8 files changed, 679 insertions(+), 533 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a6ba6e518899f..8ede62dd4f3d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12284,27 +12284,45 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETUGT:
- case ISD::SETUGE:
- if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
- sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
- return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
- if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
- sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
- hasOperation(ABDOpc, VT))
- return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+ case ISD::SETUGE: {
+ if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS)))) {
+ if (sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+
+ if (sd_match(False, m_Neg(m_Sub(m_Specific(LHS), m_Specific(RHS)))))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ }
+
+ if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS)))) {
+ if (sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
+ return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+
+ if (sd_match(False, m_Neg(m_Sub(m_Specific(RHS), m_Specific(LHS)))))
+ return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+ }
break;
+ }
case ISD::SETLT:
case ISD::SETLE:
case ISD::SETULT:
- case ISD::SETULE:
- if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
- sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
- return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
- if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
- sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
- hasOperation(ABDOpc, VT))
- return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+ case ISD::SETULE: {
+ if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS)))) {
+ if (sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+
+ if (sd_match(False, m_Neg(m_Sub(m_Specific(RHS), m_Specific(LHS)))))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ }
+
+ if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS)))) {
+ if (sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
+ return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+
+ if (sd_match(False, m_Neg(m_Sub(m_Specific(LHS), m_Specific(RHS)))))
+ return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
+ }
break;
+ }
default:
break;
}
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 37319642f5b34..c3624d2ba69af 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -310,10 +310,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w9, w0, w1
-; CHECK-NEXT: sub w10, w1, w0
-; CHECK-NEXT: cmp w8, w1, sxtb
-; CHECK-NEXT: csel w0, w9, w10, le
+; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%cmp = icmp sle i8 %a, %b
%ab = sub i8 %a, %b
@@ -326,10 +324,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w9, w0, w1
-; CHECK-NEXT: sub w10, w1, w0
-; CHECK-NEXT: cmp w8, w1, sxth
-; CHECK-NEXT: csel w0, w9, w10, lt
+; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%cmp = icmp slt i16 %a, %b
%ab = sub i16 %a, %b
@@ -342,7 +338,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: cneg w0, w8, gt
; CHECK-NEXT: ret
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -355,7 +351,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: cneg x0, x8, gt
; CHECK-NEXT: ret
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
@@ -367,12 +363,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_cmp_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x2, x0
-; CHECK-NEXT: sbc x9, x3, x1
-; CHECK-NEXT: subs x10, x0, x2
-; CHECK-NEXT: sbcs x11, x1, x3
-; CHECK-NEXT: csel x0, x10, x8, lt
-; CHECK-NEXT: csel x1, x11, x9, lt
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x8, x8, x10, lt
+; CHECK-NEXT: csel x9, x9, x11, lt
+; CHECK-NEXT: negs x0, x8
+; CHECK-NEXT: ngc x1, x9
; CHECK-NEXT: ret
%cmp = icmp slt i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 269cbf03f32a0..e541f39d40ed5 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -310,10 +310,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w9, w0, w1
-; CHECK-NEXT: sub w10, w1, w0
-; CHECK-NEXT: cmp w8, w1, uxtb
-; CHECK-NEXT: csel w0, w9, w10, ls
+; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%cmp = icmp ule i8 %a, %b
%ab = sub i8 %a, %b
@@ -326,10 +324,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w9, w0, w1
-; CHECK-NEXT: sub w10, w1, w0
-; CHECK-NEXT: cmp w8, w1, uxth
-; CHECK-NEXT: csel w0, w9, w10, lo
+; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%cmp = icmp ult i16 %a, %b
%ab = sub i16 %a, %b
@@ -342,7 +338,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, hs
+; CHECK-NEXT: cneg w0, w8, hi
; CHECK-NEXT: ret
%cmp = icmp uge i32 %a, %b
%ab = sub i32 %a, %b
@@ -355,7 +351,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, hs
+; CHECK-NEXT: cneg x0, x8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
@@ -367,12 +363,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_cmp_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x2, x0
-; CHECK-NEXT: sbc x9, x3, x1
-; CHECK-NEXT: subs x10, x0, x2
-; CHECK-NEXT: sbcs x11, x1, x3
-; CHECK-NEXT: csel x0, x10, x8, lo
-; CHECK-NEXT: csel x1, x11, x9, lo
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: csel x9, x9, x11, lo
+; CHECK-NEXT: negs x0, x8
+; CHECK-NEXT: ngc x1, x9
; CHECK-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index eaab932c41df7..7489ceacb7a17 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -966,10 +966,9 @@ define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_ugt_opp:
; CHECK-PWR78: # %bb.0:
-; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
-; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
-; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
-; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
+; CHECK-PWR78-NEXT: vmaxuw v4, v2, v3
+; CHECK-PWR78-NEXT: vminuw v2, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index 41f73f51fe7b6..e4ab26a6e6cac 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -1646,42 +1646,35 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; RV32I-LABEL: abd_cmp_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a2, a0, 24
-; RV32I-NEXT: slli a3, a1, 24
-; RV32I-NEXT: srai a2, a2, 24
-; RV32I-NEXT: srai a3, a3, 24
-; RV32I-NEXT: bge a3, a2, .LBB18_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sub a0, a1, a0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai a1, a1, 24
+; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a2, a0, 56
-; RV64I-NEXT: slli a3, a1, 56
-; RV64I-NEXT: srai a2, a2, 56
-; RV64I-NEXT: srai a3, a3, 56
-; RV64I-NEXT: bge a3, a2, .LBB18_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: sub a0, a1, a0
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB18_2:
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a1, a1, 56
+; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: srai a1, a0, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_cmp_i8:
; ZBB: # %bb.0:
-; ZBB-NEXT: sext.b a2, a0
-; ZBB-NEXT: sext.b a3, a1
-; ZBB-NEXT: bge a3, a2, .LBB18_2
-; ZBB-NEXT: # %bb.1:
-; ZBB-NEXT: sub a0, a1, a0
-; ZBB-NEXT: ret
-; ZBB-NEXT: .LBB18_2:
-; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: sext.b a1, a1
+; ZBB-NEXT: sext.b a0, a0
+; ZBB-NEXT: max a2, a0, a1
+; ZBB-NEXT: min a0, a0, a1
+; ZBB-NEXT: sub a0, a0, a2
; ZBB-NEXT: ret
%cmp = icmp sle i8 %a, %b
%ab = sub i8 %a, %b
@@ -1693,42 +1686,35 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: abd_cmp_i16:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a2, a1, 16
-; RV32I-NEXT: slli a3, a0, 16
-; RV32I-NEXT: srai a2, a2, 16
-; RV32I-NEXT: srai a3, a3, 16
-; RV32I-NEXT: blt a3, a2, .LBB19_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sub a0, a1, a0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB19_2:
+; RV32I-NEXT: slli a1, a1, 16
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai a1, a1, 16
+; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i16:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a2, a1, 48
-; RV64I-NEXT: slli a3, a0, 48
-; RV64I-NEXT: srai a2, a2, 48
-; RV64I-NEXT: srai a3, a3, 48
-; RV64I-NEXT: blt a3, a2, .LBB19_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: sub a0, a1, a0
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB19_2:
+; RV64I-NEXT: slli a1, a1, 48
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a1, a1, 48
+; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: srai a1, a0, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_cmp_i16:
; ZBB: # %bb.0:
-; ZBB-NEXT: sext.h a2, a1
-; ZBB-NEXT: sext.h a3, a0
-; ZBB-NEXT: blt a3, a2, .LBB19_2
-; ZBB-NEXT: # %bb.1:
-; ZBB-NEXT: sub a0, a1, a0
-; ZBB-NEXT: ret
-; ZBB-NEXT: .LBB19_2:
-; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: sext.h a1, a1
+; ZBB-NEXT: sext.h a0, a0
+; ZBB-NEXT: max a2, a0, a1
+; ZBB-NEXT: min a0, a0, a1
+; ZBB-NEXT: sub a0, a0, a2
; ZBB-NEXT: ret
%cmp = icmp slt i16 %a, %b
%ab = sub i16 %a, %b
@@ -1740,46 +1726,40 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: abd_cmp_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: bge a0, a1, .LBB20_2
+; RV32I-NEXT: blt a1, a0, .LBB20_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB20_2:
-; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a2, a1
-; RV64I-NEXT: sext.w a3, a0
-; RV64I-NEXT: bge a3, a2, .LBB20_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a0, a0, a1
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB20_2:
+; RV64I-NEXT: sext.w a1, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: srai a1, a0, 63
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a1, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_cmp_i32:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: bge a0, a1, .LBB20_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sub a0, a0, a1
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB20_2:
-; RV32ZBB-NEXT: sub a0, a1, a0
+; RV32ZBB-NEXT: max a2, a0, a1
+; RV32ZBB-NEXT: min a0, a0, a1
+; RV32ZBB-NEXT: sub a0, a0, a2
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: sext.w a2, a1
-; RV64ZBB-NEXT: sext.w a3, a0
-; RV64ZBB-NEXT: bge a3, a2, .LBB20_2
-; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: subw a0, a0, a1
-; RV64ZBB-NEXT: ret
-; RV64ZBB-NEXT: .LBB20_2:
-; RV64ZBB-NEXT: subw a0, a1, a0
+; RV64ZBB-NEXT: sext.w a1, a1
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: max a2, a0, a1
+; RV64ZBB-NEXT: min a0, a0, a1
+; RV64ZBB-NEXT: subw a0, a0, a2
; RV64ZBB-NEXT: ret
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -1791,64 +1771,73 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: abd_cmp_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: mv a5, a4
; RV32I-NEXT: beq a1, a3, .LBB21_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt a5, a1, a3
+; RV32I-NEXT: slt a5, a3, a1
; RV32I-NEXT: .LBB21_2:
; RV32I-NEXT: bnez a5, .LBB21_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sub a1, a3, a1
; RV32I-NEXT: sub a1, a1, a4
; RV32I-NEXT: sub a0, a2, a0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB21_5
; RV32I-NEXT: .LBB21_4:
+; RV32I-NEXT: sltu a4, a0, a2
; RV32I-NEXT: sub a1, a1, a3
; RV32I-NEXT: sub a1, a1, a4
; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: .LBB21_5:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a0, a1, .LBB21_2
+; RV64I-NEXT: blt a1, a0, .LBB21_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB21_2:
; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_cmp_i64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: sltu a4, a0, a2
+; RV32ZBB-NEXT: sltu a4, a2, a0
; RV32ZBB-NEXT: mv a5, a4
; RV32ZBB-NEXT: beq a1, a3, .LBB21_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: slt a5, a1, a3
+; RV32ZBB-NEXT: slt a5, a3, a1
; RV32ZBB-NEXT: .LBB21_2:
; RV32ZBB-NEXT: bnez a5, .LBB21_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu a4, a2, a0
; RV32ZBB-NEXT: sub a1, a3, a1
; RV32ZBB-NEXT: sub a1, a1, a4
; RV32ZBB-NEXT: sub a0, a2, a0
-; RV32ZBB-NEXT: ret
+; RV32ZBB-NEXT: j .LBB21_5
; RV32ZBB-NEXT: .LBB21_4:
+; RV32ZBB-NEXT: sltu a4, a0, a2
; RV32ZBB-NEXT: sub a1, a1, a3
; RV32ZBB-NEXT: sub a1, a1, a4
; RV32ZBB-NEXT: sub a0, a0, a2
+; RV32ZBB-NEXT: .LBB21_5:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: add a1, a1, a2
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: neg a0, a0
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i64:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: blt a0, a1, .LBB21_2
-; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: sub a0, a1, a0
-; RV64ZBB-NEXT: ret
-; RV64ZBB-NEXT: .LBB21_2:
-; RV64ZBB-NEXT: sub a0, a0, a1
+; RV64ZBB-NEXT: max a2, a0, a1
+; RV64ZBB-NEXT: min a0, a0, a1
+; RV64ZBB-NEXT: sub a0, a0, a2
; RV64ZBB-NEXT: ret
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
@@ -1860,176 +1849,240 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_cmp_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 4(a2)
-; RV32I-NEXT: lw a5, 8(a2)
-; RV32I-NEXT: lw a7, 12(a2)
-; RV32I-NEXT: lw a2, 0(a1)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t1, 12(a1)
-; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu t0, a6, a5
-; RV32I-NEXT: mv t4, t0
-; RV32I-NEXT: beq t1, a7, .LBB22_2
+; RV32I-NEXT: lw a1, 0(a2)
+; RV32I-NEXT: lw t0, 8(a2)
+; RV32I-NEXT: lw t2, 12(a2)
+; RV32I-NEXT: lw a2, 4(a2)
+; RV32I-NEXT: sltu t3, t0, a6
+; RV32I-NEXT: mv t4, t3
+; RV32I-NEXT: beq t1, t2, .LBB22_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt t4, t1, a7
+; RV32I-NEXT: slt t4, t2, t1
; RV32I-NEXT: .LBB22_2:
-; RV32I-NEXT: sltu t2, a2, a3
-; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: beq a1, a4, .LBB22_4
+; RV32I-NEXT: sltu a5, a1, a3
+; RV32I-NEXT: sltu t6, a2, a4
+; RV32I-NEXT: mv a7, a5
+; RV32I-NEXT: beq a4, a2, .LBB22_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t3, a1, a4
+; RV32I-NEXT: mv a7, t6
; RV32I-NEXT: .LBB22_4:
-; RV32I-NEXT: xor t5, t1, a7
-; RV32I-NEXT: xor t6, a6, a5
-; RV32I-NEXT: or t5, t6, t5
-; RV32I-NEXT: mv t6, t3
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: xor t5, t1, t2
+; RV32I-NEXT: xor s0, a6, t0
+; RV32I-NEXT: or t5, s0, t5
; RV32I-NEXT: beqz t5, .LBB22_6
; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: mv t6, t4
+; RV32I-NEXT: mv a7, t4
; RV32I-NEXT: .LBB22_6:
-; RV32I-NEXT: sltu t4, a3, a2
-; RV32I-NEXT: mv t5, t4
-; RV32I-NEXT: beq a1, a4, .LBB22_8
+; RV32I-NEXT: mv t5, a5
+; RV32I-NEXT: beq a2, a4, .LBB22_8
; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: sltu t5, a4, a1
+; RV32I-NEXT: mv t5, t6
; RV32I-NEXT: .LBB22_8:
-; RV32I-NEXT: bnez t6, .LBB22_10
+; RV32I-NEXT: sltu t4, a3, a1
+; RV32I-NEXT: mv t6, t4
+; RV32I-NEXT: beq a4, a2, .LBB22_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t0, a5, a6
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: sub a5, a5, a6
-; RV32I-NEXT: sub a4, a4, a1
-; RV32I-NEXT: sub a6, a7, t0
-; RV32I-NEXT: sltu a7, a5, t5
-; RV32I-NEXT: sub a1, a5, t5
-; RV32I-NEXT: sub a5, a4, t4
-; RV32I-NEXT: sub a4, a6, a7
-; RV32I-NEXT: sub a2, a3, a2
-; RV32I-NEXT: j .LBB22_11
+; RV32I-NEXT: sltu t6, a4, a2
; RV32I-NEXT: .LBB22_10:
-; RV32I-NEXT: sub a7, t1, a7
-; RV32I-NEXT: sub a5, a6, a5
-; RV32I-NEXT: sub a4, a1, a4
-; RV32I-NEXT: sub a6, a7, t0
-; RV32I-NEXT: sltu a7, a5, t3
-; RV32I-NEXT: sub a1, a5, t3
-; RV32I-NEXT: sub a5, a4, t2
-; RV32I-NEXT: sub a4, a6, a7
-; RV32I-NEXT: sub a2, a2, a3
-; RV32I-NEXT: .LBB22_11:
-; RV32I-NEXT: sw a2, 0(a0)
-; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: bnez a7, .LBB22_12
+; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: sub t1, t2, t1
+; RV32I-NEXT: sub a6, t0, a6
+; RV32I-NEXT: sub t0, t1, t3
+; RV32I-NEXT: sltu t1, a6, t5
+; RV32I-NEXT: sub t0, t0, t1
+; RV32I-NEXT: sub a6, a6, t5
+; RV32I-NEXT: j .LBB22_13
+; RV32I-NEXT: .LBB22_12:
+; RV32I-NEXT: sltu t3, a6, t0
+; RV32I-NEXT: sub t1, t1, t2
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: sub t0, t1, t3
+; RV32I-NEXT: sltu t1, a6, t6
+; RV32I-NEXT: sub t0, t0, t1
+; RV32I-NEXT: sub a6, a6, t6
+; RV32I-NEXT: .LBB22_13:
+; RV32I-NEXT: snez t1, a6
+; RV32I-NEXT: add t0, t0, t1
+; RV32I-NEXT: bnez a7, .LBB22_15
+; RV32I-NEXT: # %bb.14:
+; RV32I-NEXT: sub a2, a2, a4
+; RV32I-NEXT: sub a2, a2, a5
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: j .LBB22_16
+; RV32I-NEXT: .LBB22_15:
+; RV32I-NEXT: sub a4, a4, a2
+; RV32I-NEXT: sub a2, a4, t4
+; RV32I-NEXT: sub a1, a3, a1
+; RV32I-NEXT: .LBB22_16:
+; RV32I-NEXT: or a3, a1, a2
+; RV32I-NEXT: neg a4, a6
+; RV32I-NEXT: neg a5, t0
+; RV32I-NEXT: snez a6, a1
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: snez a3, a3
+; RV32I-NEXT: add a2, a2, a6
+; RV32I-NEXT: sltu a6, a4, a3
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a4, a4, a3
+; RV32I-NEXT: sub a3, a5, a6
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
+; RV32I-NEXT: sw a3, 12(a0)
+; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i128:
; RV64I: # %bb.0:
-; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: sltu a4, a2, a0
; RV64I-NEXT: mv a5, a4
; RV64I-NEXT: beq a1, a3, .LBB22_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slt a5, a1, a3
+; RV64I-NEXT: slt a5, a3, a1
; RV64I-NEXT: .LBB22_2:
; RV64I-NEXT: bnez a5, .LBB22_4
; RV64I-NEXT: # %bb.3:
-; RV64I-NEXT: sltu a4, a2, a0
; RV64I-NEXT: sub a1, a3, a1
; RV64I-NEXT: sub a1, a1, a4
; RV64I-NEXT: sub a0, a2, a0
-; RV64I-NEXT: ret
+; RV64I-NEXT: j .LBB22_5
; RV64I-NEXT: .LBB22_4:
+; RV64I-NEXT: sltu a4, a0, a2
; RV64I-NEXT: sub a1, a1, a3
; RV64I-NEXT: sub a1, a1, a4
; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: .LBB22_5:
+; RV64I-NEXT: snez a2, a0
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_cmp_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 4(a2)
-; RV32ZBB-NEXT: lw a5, 8(a2)
-; RV32ZBB-NEXT: lw a7, 12(a2)
-; RV32ZBB-NEXT: lw a2, 0(a1)
+; RV32ZBB-NEXT: lw a3, 0(a1)
+; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t1, 12(a1)
-; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu t0, a6, a5
-; RV32ZBB-NEXT: mv t4, t0
-; RV32ZBB-NEXT: beq t1, a7, .LBB22_2
+; RV32ZBB-NEXT: lw a1, 0(a2)
+; RV32ZBB-NEXT: lw t0, 8(a2)
+; RV32ZBB-NEXT: lw t2, 12(a2)
+; RV32ZBB-NEXT: lw a2, 4(a2)
+; RV32ZBB-NEXT: sltu t3, t0, a6
+; RV32ZBB-NEXT: mv t4, t3
+; RV32ZBB-NEXT: beq t1, t2, .LBB22_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: slt t4, t1, a7
+; RV32ZBB-NEXT: slt t4, t2, t1
; RV32ZBB-NEXT: .LBB22_2:
-; RV32ZBB-NEXT: sltu t2, a2, a3
-; RV32ZBB-NEXT: mv t3, t2
-; RV32ZBB-NEXT: beq a1, a4, .LBB22_4
+; RV32ZBB-NEXT: sltu a5, a1, a3
+; RV32ZBB-NEXT: sltu t6, a2, a4
+; RV32ZBB-NEXT: mv a7, a5
+; RV32ZBB-NEXT: beq a4, a2, .LBB22_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t3, a1, a4
+; RV32ZBB-NEXT: mv a7, t6
; RV32ZBB-NEXT: .LBB22_4:
-; RV32ZBB-NEXT: xor t5, t1, a7
-; RV32ZBB-NEXT: xor t6, a6, a5
-; RV32ZBB-NEXT: or t5, t6, t5
-; RV32ZBB-NEXT: mv t6, t3
+; RV32ZBB-NEXT: addi sp, sp, -16
+; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32ZBB-NEXT: xor t5, t1, t2
+; RV32ZBB-NEXT: xor s0, a6, t0
+; RV32ZBB-NEXT: or t5, s0, t5
; RV32ZBB-NEXT: beqz t5, .LBB22_6
; RV32ZBB-NEXT: # %bb.5:
-; RV32ZBB-NEXT: mv t6, t4
+; RV32ZBB-NEXT: mv a7, t4
; RV32ZBB-NEXT: .LBB22_6:
-; RV32ZBB-NEXT: sltu t4, a3, a2
-; RV32ZBB-NEXT: mv t5, t4
-; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
+; RV32ZBB-NEXT: mv t5, a5
+; RV32ZBB-NEXT: beq a2, a4, .LBB22_8
; RV32ZBB-NEXT: # %bb.7:
-; RV32ZBB-NEXT: sltu t5, a4, a1
+; RV32ZBB-NEXT: mv t5, t6
; RV32ZBB-NEXT: .LBB22_8:
-; RV32ZBB-NEXT: bnez t6, .LBB22_10
+; RV32ZBB-NEXT: sltu t4, a3, a1
+; RV32ZBB-NEXT: mv t6, t4
+; RV32ZBB-NEXT: beq a4, a2, .LBB22_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: sltu t0, a5, a6
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: sub a5, a5, a6
-; RV32ZBB-NEXT: sub a4, a4, a1
-; RV32ZBB-NEXT: sub a6, a7, t0
-; RV32ZBB-NEXT: sltu a7, a5, t5
-; RV32ZBB-NEXT: sub a1, a5, t5
-; RV32ZBB-NEXT: sub a5, a4, t4
-; RV32ZBB-NEXT: sub a4, a6, a7
-; RV32ZBB-NEXT: sub a2, a3, a2
-; RV32ZBB-NEXT: j .LBB22_11
+; RV32ZBB-NEXT: sltu t6, a4, a2
; RV32ZBB-NEXT: .LBB22_10:
-; RV32ZBB-NEXT: sub a7, t1, a7
-; RV32ZBB-NEXT: sub a5, a6, a5
-; RV32ZBB-NEXT: sub a4, a1, a4
-; RV32ZBB-NEXT: sub a6, a7, t0
-; RV32ZBB-NEXT: sltu a7, a5, t3
-; RV32ZBB-NEXT: sub a1, a5, t3
-; RV32ZBB-NEXT: sub a5, a4, t2
-; RV32ZBB-NEXT: sub a4, a6, a7
-; RV32ZBB-NEXT: sub a2, a2, a3
-; RV32ZBB-NEXT: .LBB22_11:
-; RV32ZBB-NEXT: sw a2, 0(a0)
-; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a4, 12(a0)
+; RV32ZBB-NEXT: bnez a7, .LBB22_12
+; RV32ZBB-NEXT: # %bb.11:
+; RV32ZBB-NEXT: sub t1, t2, t1
+; RV32ZBB-NEXT: sub a6, t0, a6
+; RV32ZBB-NEXT: sub t0, t1, t3
+; RV32ZBB-NEXT: sltu t1, a6, t5
+; RV32ZBB-NEXT: sub t0, t0, t1
+; RV32ZBB-NEXT: sub a6, a6, t5
+; RV32ZBB-NEXT: j .LBB22_13
+; RV32ZBB-NEXT: .LBB22_12:
+; RV32ZBB-NEXT: sltu t3, a6, t0
+; RV32ZBB-NEXT: sub t1, t1, t2
+; RV32ZBB-NEXT: sub a6, a6, t0
+; RV32ZBB-NEXT: sub t0, t1, t3
+; RV32ZBB-NEXT: sltu t1, a6, t6
+; RV32ZBB-NEXT: sub t0, t0, t1
+; RV32ZBB-NEXT: sub a6, a6, t6
+; RV32ZBB-NEXT: .LBB22_13:
+; RV32ZBB-NEXT: snez t1, a6
+; RV32ZBB-NEXT: add t0, t0, t1
+; RV32ZBB-NEXT: bnez a7, .LBB22_15
+; RV32ZBB-NEXT: # %bb.14:
+; RV32ZBB-NEXT: sub a2, a2, a4
+; RV32ZBB-NEXT: sub a2, a2, a5
+; RV32ZBB-NEXT: sub a1, a1, a3
+; RV32ZBB-NEXT: j .LBB22_16
+; RV32ZBB-NEXT: .LBB22_15:
+; RV32ZBB-NEXT: sub a4, a4, a2
+; RV32ZBB-NEXT: sub a2, a4, t4
+; RV32ZBB-NEXT: sub a1, a3, a1
+; RV32ZBB-NEXT: .LBB22_16:
+; RV32ZBB-NEXT: or a3, a1, a2
+; RV32ZBB-NEXT: neg a4, a6
+; RV32ZBB-NEXT: neg a5, t0
+; RV32ZBB-NEXT: snez a6, a1
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: snez a3, a3
+; RV32ZBB-NEXT: add a2, a2, a6
+; RV32ZBB-NEXT: sltu a6, a4, a3
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a4, a4, a3
+; RV32ZBB-NEXT: sub a3, a5, a6
+; RV32ZBB-NEXT: sw a1, 0(a0)
+; RV32ZBB-NEXT: sw a2, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
+; RV32ZBB-NEXT: sw a3, 12(a0)
+; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZBB-NEXT: addi sp, sp, 16
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i128:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: sltu a4, a0, a2
+; RV64ZBB-NEXT: sltu a4, a2, a0
; RV64ZBB-NEXT: mv a5, a4
; RV64ZBB-NEXT: beq a1, a3, .LBB22_2
; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: slt a5, a1, a3
+; RV64ZBB-NEXT: slt a5, a3, a1
; RV64ZBB-NEXT: .LBB22_2:
; RV64ZBB-NEXT: bnez a5, .LBB22_4
; RV64ZBB-NEXT: # %bb.3:
-; RV64ZBB-NEXT: sltu a4, a2, a0
; RV64ZBB-NEXT: sub a1, a3, a1
; RV64ZBB-NEXT: sub a1, a1, a4
; RV64ZBB-NEXT: sub a0, a2, a0
-; RV64ZBB-NEXT: ret
+; RV64ZBB-NEXT: j .LBB22_5
; RV64ZBB-NEXT: .LBB22_4:
+; RV64ZBB-NEXT: sltu a4, a0, a2
; RV64ZBB-NEXT: sub a1, a1, a3
; RV64ZBB-NEXT: sub a1, a1, a4
; RV64ZBB-NEXT: sub a0, a0, a2
+; RV64ZBB-NEXT: .LBB22_5:
+; RV64ZBB-NEXT: snez a2, a0
+; RV64ZBB-NEXT: add a1, a1, a2
+; RV64ZBB-NEXT: neg a1, a1
+; RV64ZBB-NEXT: neg a0, a0
; RV64ZBB-NEXT: ret
%cmp = icmp slt i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll
index 713b52f53e3d9..30f41f73199c9 100644
--- a/llvm/test/CodeGen/RISCV/abdu-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll
@@ -1577,28 +1577,33 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
;
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
-; NOZBB-LABEL: abd_cmp_i8:
-; NOZBB: # %bb.0:
-; NOZBB-NEXT: zext.b a2, a0
-; NOZBB-NEXT: zext.b a3, a1
-; NOZBB-NEXT: bgeu a3, a2, .LBB18_2
-; NOZBB-NEXT: # %bb.1:
-; NOZBB-NEXT: sub a0, a1, a0
-; NOZBB-NEXT: ret
-; NOZBB-NEXT: .LBB18_2:
-; NOZBB-NEXT: sub a0, a0, a1
-; NOZBB-NEXT: ret
+; RV32I-LABEL: abd_cmp_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a1, a1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: abd_cmp_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: srai a1, a0, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_cmp_i8:
; ZBB: # %bb.0:
-; ZBB-NEXT: zext.b a2, a0
-; ZBB-NEXT: zext.b a3, a1
-; ZBB-NEXT: bgeu a3, a2, .LBB18_2
-; ZBB-NEXT: # %bb.1:
-; ZBB-NEXT: sub a0, a1, a0
-; ZBB-NEXT: ret
-; ZBB-NEXT: .LBB18_2:
-; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: zext.b a1, a1
+; ZBB-NEXT: zext.b a0, a0
+; ZBB-NEXT: maxu a2, a0, a1
+; ZBB-NEXT: minu a0, a0, a1
+; ZBB-NEXT: sub a0, a0, a2
; ZBB-NEXT: ret
%cmp = icmp ule i8 %a, %b
%ab = sub i8 %a, %b
@@ -1608,30 +1613,37 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
}
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
-; NOZBB-LABEL: abd_cmp_i16:
-; NOZBB: # %bb.0:
-; NOZBB-NEXT: lui a2, 16
-; NOZBB-NEXT: addi a2, a2, -1
-; NOZBB-NEXT: and a3, a1, a2
-; NOZBB-NEXT: and a2, a0, a2
-; NOZBB-NEXT: bltu a2, a3, .LBB19_2
-; NOZBB-NEXT: # %bb.1:
-; NOZBB-NEXT: sub a0, a1, a0
-; NOZBB-NEXT: ret
-; NOZBB-NEXT: .LBB19_2:
-; NOZBB-NEXT: sub a0, a0, a1
-; NOZBB-NEXT: ret
+; RV32I-LABEL: abd_cmp_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: abd_cmp_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: srai a1, a0, 63
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_cmp_i16:
; ZBB: # %bb.0:
-; ZBB-NEXT: zext.h a2, a1
-; ZBB-NEXT: zext.h a3, a0
-; ZBB-NEXT: bltu a3, a2, .LBB19_2
-; ZBB-NEXT: # %bb.1:
-; ZBB-NEXT: sub a0, a1, a0
-; ZBB-NEXT: ret
-; ZBB-NEXT: .LBB19_2:
-; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: zext.h a1, a1
+; ZBB-NEXT: zext.h a0, a0
+; ZBB-NEXT: maxu a2, a0, a1
+; ZBB-NEXT: minu a0, a0, a1
+; ZBB-NEXT: sub a0, a0, a2
; ZBB-NEXT: ret
%cmp = icmp ult i16 %a, %b
%ab = sub i16 %a, %b
@@ -1643,46 +1655,44 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: abd_cmp_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a0, a1, .LBB20_2
+; RV32I-NEXT: bltu a1, a0, .LBB20_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB20_2:
-; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a2, a1
-; RV64I-NEXT: sext.w a3, a0
-; RV64I-NEXT: bgeu a3, a2, .LBB20_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a0, a0, a1
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB20_2:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: srai a1, a0, 63
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a1, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_cmp_i32:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: bgeu a0, a1, .LBB20_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sub a0, a0, a1
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB20_2:
-; RV32ZBB-NEXT: sub a0, a1, a0
+; RV32ZBB-NEXT: maxu a2, a0, a1
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: sub a0, a0, a2
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: sext.w a2, a1
-; RV64ZBB-NEXT: sext.w a3, a0
-; RV64ZBB-NEXT: bgeu a3, a2, .LBB20_2
-; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: subw a0, a0, a1
-; RV64ZBB-NEXT: ret
-; RV64ZBB-NEXT: .LBB20_2:
-; RV64ZBB-NEXT: subw a0, a1, a0
+; RV64ZBB-NEXT: slli a1, a1, 32
+; RV64ZBB-NEXT: slli a0, a0, 32
+; RV64ZBB-NEXT: srli a1, a1, 32
+; RV64ZBB-NEXT: srli a0, a0, 32
+; RV64ZBB-NEXT: maxu a2, a0, a1
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: subw a0, a0, a2
; RV64ZBB-NEXT: ret
%cmp = icmp uge i32 %a, %b
%ab = sub i32 %a, %b
@@ -1695,63 +1705,72 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: abd_cmp_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a4, a0, a2
-; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: beq a1, a3, .LBB21_2
+; RV32I-NEXT: sub a3, a1, a3
+; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: sub a2, a0, a2
+; RV32I-NEXT: beq a3, a1, .LBB21_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a5, a1, a3
+; RV32I-NEXT: sltu a0, a1, a3
+; RV32I-NEXT: j .LBB21_3
; RV32I-NEXT: .LBB21_2:
-; RV32I-NEXT: bnez a5, .LBB21_4
-; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: sub a1, a3, a1
-; RV32I-NEXT: sub a1, a1, a4
-; RV32I-NEXT: sub a0, a2, a0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB21_4:
-; RV32I-NEXT: sub a1, a1, a3
-; RV32I-NEXT: sub a1, a1, a4
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sltu a0, a0, a2
+; RV32I-NEXT: .LBB21_3:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: xor a2, a2, a1
+; RV32I-NEXT: xor a3, a3, a1
+; RV32I-NEXT: sltu a1, a2, a1
+; RV32I-NEXT: add a3, a3, a0
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: sub a3, a3, a1
+; RV32I-NEXT: snez a1, a0
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a0, a1, .LBB21_2
+; RV64I-NEXT: bltu a1, a0, .LBB21_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB21_2:
; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_cmp_i64:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: sltu a4, a0, a2
-; RV32ZBB-NEXT: mv a5, a4
-; RV32ZBB-NEXT: beq a1, a3, .LBB21_2
+; RV32ZBB-NEXT: sub a3, a1, a3
+; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a2, a0, a2
+; RV32ZBB-NEXT: beq a3, a1, .LBB21_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu a5, a1, a3
+; RV32ZBB-NEXT: sltu a0, a1, a3
+; RV32ZBB-NEXT: j .LBB21_3
; RV32ZBB-NEXT: .LBB21_2:
-; RV32ZBB-NEXT: bnez a5, .LBB21_4
-; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu a4, a2, a0
-; RV32ZBB-NEXT: sub a1, a3, a1
-; RV32ZBB-NEXT: sub a1, a1, a4
-; RV32ZBB-NEXT: sub a0, a2, a0
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB21_4:
-; RV32ZBB-NEXT: sub a1, a1, a3
-; RV32ZBB-NEXT: sub a1, a1, a4
-; RV32ZBB-NEXT: sub a0, a0, a2
+; RV32ZBB-NEXT: sltu a0, a0, a2
+; RV32ZBB-NEXT: .LBB21_3:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: xor a2, a2, a1
+; RV32ZBB-NEXT: xor a3, a3, a1
+; RV32ZBB-NEXT: sltu a1, a2, a1
+; RV32ZBB-NEXT: add a3, a3, a0
+; RV32ZBB-NEXT: add a0, a2, a0
+; RV32ZBB-NEXT: sub a3, a3, a1
+; RV32ZBB-NEXT: snez a1, a0
+; RV32ZBB-NEXT: add a1, a3, a1
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: neg a0, a0
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i64:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: bltu a0, a1, .LBB21_2
-; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: sub a0, a1, a0
-; RV64ZBB-NEXT: ret
-; RV64ZBB-NEXT: .LBB21_2:
-; RV64ZBB-NEXT: sub a0, a0, a1
+; RV64ZBB-NEXT: maxu a2, a0, a1
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: sub a0, a0, a2
; RV64ZBB-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
@@ -1763,176 +1782,226 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_cmp_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 4(a2)
-; RV32I-NEXT: lw a5, 8(a2)
-; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a5, 0(a2)
+; RV32I-NEXT: lw a7, 4(a2)
+; RV32I-NEXT: lw a3, 8(a2)
+; RV32I-NEXT: lw t1, 12(a2)
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: lw a6, 12(a1)
; RV32I-NEXT: lw a2, 0(a1)
-; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw t1, 12(a1)
-; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu t0, a6, a5
-; RV32I-NEXT: mv t4, t0
-; RV32I-NEXT: beq t1, a7, .LBB22_2
+; RV32I-NEXT: lw t0, 4(a1)
+; RV32I-NEXT: sltu a1, a4, a3
+; RV32I-NEXT: sub t1, a6, t1
+; RV32I-NEXT: sltu t2, a2, a5
+; RV32I-NEXT: sub a1, t1, a1
+; RV32I-NEXT: mv t1, t2
+; RV32I-NEXT: beq t0, a7, .LBB22_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t4, t1, a7
+; RV32I-NEXT: sltu t1, t0, a7
; RV32I-NEXT: .LBB22_2:
-; RV32I-NEXT: sltu t2, a2, a3
-; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: beq a1, a4, .LBB22_4
+; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sltu t3, a3, t1
+; RV32I-NEXT: sub a1, a1, t3
+; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: beq a1, a6, .LBB22_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t3, a1, a4
+; RV32I-NEXT: sltu t1, a6, a1
+; RV32I-NEXT: j .LBB22_5
; RV32I-NEXT: .LBB22_4:
-; RV32I-NEXT: xor t5, t1, a7
-; RV32I-NEXT: xor t6, a6, a5
-; RV32I-NEXT: or t5, t6, t5
-; RV32I-NEXT: mv t6, t3
-; RV32I-NEXT: beqz t5, .LBB22_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: mv t6, t4
-; RV32I-NEXT: .LBB22_6:
-; RV32I-NEXT: sltu t4, a3, a2
-; RV32I-NEXT: mv t5, t4
-; RV32I-NEXT: beq a1, a4, .LBB22_8
-; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: sltu t5, a4, a1
+; RV32I-NEXT: sltu t1, a4, a3
+; RV32I-NEXT: .LBB22_5:
+; RV32I-NEXT: sub a7, t0, a7
+; RV32I-NEXT: sub a7, a7, t2
+; RV32I-NEXT: sub a5, a2, a5
+; RV32I-NEXT: beq a7, t0, .LBB22_7
+; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: sltu a2, t0, a7
+; RV32I-NEXT: j .LBB22_8
+; RV32I-NEXT: .LBB22_7:
+; RV32I-NEXT: sltu a2, a2, a5
; RV32I-NEXT: .LBB22_8:
-; RV32I-NEXT: bnez t6, .LBB22_10
+; RV32I-NEXT: xor a6, a1, a6
+; RV32I-NEXT: xor a4, a3, a4
+; RV32I-NEXT: or a4, a4, a6
+; RV32I-NEXT: beqz a4, .LBB22_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t0, a5, a6
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: sub a5, a5, a6
-; RV32I-NEXT: sub a4, a4, a1
-; RV32I-NEXT: sub a6, a7, t0
-; RV32I-NEXT: sltu a7, a5, t5
-; RV32I-NEXT: sub a1, a5, t5
-; RV32I-NEXT: sub a5, a4, t4
-; RV32I-NEXT: sub a4, a6, a7
-; RV32I-NEXT: sub a2, a3, a2
-; RV32I-NEXT: j .LBB22_11
+; RV32I-NEXT: mv a2, t1
; RV32I-NEXT: .LBB22_10:
-; RV32I-NEXT: sub a7, t1, a7
-; RV32I-NEXT: sub a5, a6, a5
-; RV32I-NEXT: sub a4, a1, a4
-; RV32I-NEXT: sub a6, a7, t0
-; RV32I-NEXT: sltu a7, a5, t3
-; RV32I-NEXT: sub a1, a5, t3
-; RV32I-NEXT: sub a5, a4, t2
-; RV32I-NEXT: sub a4, a6, a7
-; RV32I-NEXT: sub a2, a2, a3
-; RV32I-NEXT: .LBB22_11:
-; RV32I-NEXT: sw a2, 0(a0)
-; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: xor t0, a5, a4
+; RV32I-NEXT: xor t3, a7, a4
+; RV32I-NEXT: sltu a5, t0, a4
+; RV32I-NEXT: add a6, t3, a2
+; RV32I-NEXT: add t0, t0, a2
+; RV32I-NEXT: sub t1, a6, a5
+; RV32I-NEXT: snez a6, t1
+; RV32I-NEXT: snez t2, t0
+; RV32I-NEXT: or a6, t2, a6
+; RV32I-NEXT: beqz a7, .LBB22_12
+; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: sltu a5, t3, a4
+; RV32I-NEXT: .LBB22_12:
+; RV32I-NEXT: xor a3, a3, a4
+; RV32I-NEXT: xor a1, a1, a4
+; RV32I-NEXT: add t1, t1, t2
+; RV32I-NEXT: neg a7, t0
+; RV32I-NEXT: add t0, a3, a2
+; RV32I-NEXT: sltu a3, a3, a4
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: neg a2, t1
+; RV32I-NEXT: sub a4, t0, a5
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: sltu a3, t0, a5
+; RV32I-NEXT: neg a5, a4
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: snez a3, a4
+; RV32I-NEXT: sltu a4, a5, a6
+; RV32I-NEXT: add a1, a1, a3
+; RV32I-NEXT: sub a3, a5, a6
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: sub a1, a1, a4
+; RV32I-NEXT: sw a7, 0(a0)
+; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a3, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i128:
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a4, a0, a2
-; RV64I-NEXT: mv a5, a4
-; RV64I-NEXT: beq a1, a3, .LBB22_2
+; RV64I-NEXT: sub a3, a1, a3
+; RV64I-NEXT: sub a3, a3, a4
+; RV64I-NEXT: sub a2, a0, a2
+; RV64I-NEXT: beq a3, a1, .LBB22_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: sltu a5, a1, a3
+; RV64I-NEXT: sltu a0, a1, a3
+; RV64I-NEXT: j .LBB22_3
; RV64I-NEXT: .LBB22_2:
-; RV64I-NEXT: bnez a5, .LBB22_4
-; RV64I-NEXT: # %bb.3:
-; RV64I-NEXT: sltu a4, a2, a0
-; RV64I-NEXT: sub a1, a3, a1
-; RV64I-NEXT: sub a1, a1, a4
-; RV64I-NEXT: sub a0, a2, a0
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB22_4:
-; RV64I-NEXT: sub a1, a1, a3
-; RV64I-NEXT: sub a1, a1, a4
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sltu a0, a0, a2
+; RV64I-NEXT: .LBB22_3:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: xor a2, a2, a1
+; RV64I-NEXT: xor a3, a3, a1
+; RV64I-NEXT: sltu a1, a2, a1
+; RV64I-NEXT: add a3, a3, a0
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: sub a3, a3, a1
+; RV64I-NEXT: snez a1, a0
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_cmp_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 4(a2)
-; RV32ZBB-NEXT: lw a5, 8(a2)
-; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a5, 0(a2)
+; RV32ZBB-NEXT: lw a7, 4(a2)
+; RV32ZBB-NEXT: lw a3, 8(a2)
+; RV32ZBB-NEXT: lw t1, 12(a2)
+; RV32ZBB-NEXT: lw a4, 8(a1)
+; RV32ZBB-NEXT: lw a6, 12(a1)
; RV32ZBB-NEXT: lw a2, 0(a1)
-; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw t1, 12(a1)
-; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu t0, a6, a5
-; RV32ZBB-NEXT: mv t4, t0
-; RV32ZBB-NEXT: beq t1, a7, .LBB22_2
+; RV32ZBB-NEXT: lw t0, 4(a1)
+; RV32ZBB-NEXT: sltu a1, a4, a3
+; RV32ZBB-NEXT: sub t1, a6, t1
+; RV32ZBB-NEXT: sltu t2, a2, a5
+; RV32ZBB-NEXT: sub a1, t1, a1
+; RV32ZBB-NEXT: mv t1, t2
+; RV32ZBB-NEXT: beq t0, a7, .LBB22_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t4, t1, a7
+; RV32ZBB-NEXT: sltu t1, t0, a7
; RV32ZBB-NEXT: .LBB22_2:
-; RV32ZBB-NEXT: sltu t2, a2, a3
-; RV32ZBB-NEXT: mv t3, t2
-; RV32ZBB-NEXT: beq a1, a4, .LBB22_4
+; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sltu t3, a3, t1
+; RV32ZBB-NEXT: sub a1, a1, t3
+; RV32ZBB-NEXT: sub a3, a3, t1
+; RV32ZBB-NEXT: beq a1, a6, .LBB22_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t3, a1, a4
+; RV32ZBB-NEXT: sltu t1, a6, a1
+; RV32ZBB-NEXT: j .LBB22_5
; RV32ZBB-NEXT: .LBB22_4:
-; RV32ZBB-NEXT: xor t5, t1, a7
-; RV32ZBB-NEXT: xor t6, a6, a5
-; RV32ZBB-NEXT: or t5, t6, t5
-; RV32ZBB-NEXT: mv t6, t3
-; RV32ZBB-NEXT: beqz t5, .LBB22_6
-; RV32ZBB-NEXT: # %bb.5:
-; RV32ZBB-NEXT: mv t6, t4
-; RV32ZBB-NEXT: .LBB22_6:
-; RV32ZBB-NEXT: sltu t4, a3, a2
-; RV32ZBB-NEXT: mv t5, t4
-; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
-; RV32ZBB-NEXT: # %bb.7:
-; RV32ZBB-NEXT: sltu t5, a4, a1
+; RV32ZBB-NEXT: sltu t1, a4, a3
+; RV32ZBB-NEXT: .LBB22_5:
+; RV32ZBB-NEXT: sub a7, t0, a7
+; RV32ZBB-NEXT: sub a7, a7, t2
+; RV32ZBB-NEXT: sub a5, a2, a5
+; RV32ZBB-NEXT: beq a7, t0, .LBB22_7
+; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: sltu a2, t0, a7
+; RV32ZBB-NEXT: j .LBB22_8
+; RV32ZBB-NEXT: .LBB22_7:
+; RV32ZBB-NEXT: sltu a2, a2, a5
; RV32ZBB-NEXT: .LBB22_8:
-; RV32ZBB-NEXT: bnez t6, .LBB22_10
+; RV32ZBB-NEXT: xor a6, a1, a6
+; RV32ZBB-NEXT: xor a4, a3, a4
+; RV32ZBB-NEXT: or a4, a4, a6
+; RV32ZBB-NEXT: beqz a4, .LBB22_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: sltu t0, a5, a6
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: sub a5, a5, a6
-; RV32ZBB-NEXT: sub a4, a4, a1
-; RV32ZBB-NEXT: sub a6, a7, t0
-; RV32ZBB-NEXT: sltu a7, a5, t5
-; RV32ZBB-NEXT: sub a1, a5, t5
-; RV32ZBB-NEXT: sub a5, a4, t4
-; RV32ZBB-NEXT: sub a4, a6, a7
-; RV32ZBB-NEXT: sub a2, a3, a2
-; RV32ZBB-NEXT: j .LBB22_11
+; RV32ZBB-NEXT: mv a2, t1
; RV32ZBB-NEXT: .LBB22_10:
-; RV32ZBB-NEXT: sub a7, t1, a7
-; RV32ZBB-NEXT: sub a5, a6, a5
-; RV32ZBB-NEXT: sub a4, a1, a4
-; RV32ZBB-NEXT: sub a6, a7, t0
-; RV32ZBB-NEXT: sltu a7, a5, t3
-; RV32ZBB-NEXT: sub a1, a5, t3
-; RV32ZBB-NEXT: sub a5, a4, t2
-; RV32ZBB-NEXT: sub a4, a6, a7
-; RV32ZBB-NEXT: sub a2, a2, a3
-; RV32ZBB-NEXT: .LBB22_11:
-; RV32ZBB-NEXT: sw a2, 0(a0)
-; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a4, 12(a0)
+; RV32ZBB-NEXT: neg a4, a2
+; RV32ZBB-NEXT: xor t0, a5, a4
+; RV32ZBB-NEXT: xor t3, a7, a4
+; RV32ZBB-NEXT: sltu a5, t0, a4
+; RV32ZBB-NEXT: add a6, t3, a2
+; RV32ZBB-NEXT: add t0, t0, a2
+; RV32ZBB-NEXT: sub t1, a6, a5
+; RV32ZBB-NEXT: snez a6, t1
+; RV32ZBB-NEXT: snez t2, t0
+; RV32ZBB-NEXT: or a6, t2, a6
+; RV32ZBB-NEXT: beqz a7, .LBB22_12
+; RV32ZBB-NEXT: # %bb.11:
+; RV32ZBB-NEXT: sltu a5, t3, a4
+; RV32ZBB-NEXT: .LBB22_12:
+; RV32ZBB-NEXT: xor a3, a3, a4
+; RV32ZBB-NEXT: xor a1, a1, a4
+; RV32ZBB-NEXT: add t1, t1, t2
+; RV32ZBB-NEXT: neg a7, t0
+; RV32ZBB-NEXT: add t0, a3, a2
+; RV32ZBB-NEXT: sltu a3, a3, a4
+; RV32ZBB-NEXT: add a1, a1, a2
+; RV32ZBB-NEXT: neg a2, t1
+; RV32ZBB-NEXT: sub a4, t0, a5
+; RV32ZBB-NEXT: sub a1, a1, a3
+; RV32ZBB-NEXT: sltu a3, t0, a5
+; RV32ZBB-NEXT: neg a5, a4
+; RV32ZBB-NEXT: sub a1, a1, a3
+; RV32ZBB-NEXT: snez a3, a4
+; RV32ZBB-NEXT: sltu a4, a5, a6
+; RV32ZBB-NEXT: add a1, a1, a3
+; RV32ZBB-NEXT: sub a3, a5, a6
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: sub a1, a1, a4
+; RV32ZBB-NEXT: sw a7, 0(a0)
+; RV32ZBB-NEXT: sw a2, 4(a0)
+; RV32ZBB-NEXT: sw a3, 8(a0)
+; RV32ZBB-NEXT: sw a1, 12(a0)
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i128:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sltu a4, a0, a2
-; RV64ZBB-NEXT: mv a5, a4
-; RV64ZBB-NEXT: beq a1, a3, .LBB22_2
+; RV64ZBB-NEXT: sub a3, a1, a3
+; RV64ZBB-NEXT: sub a3, a3, a4
+; RV64ZBB-NEXT: sub a2, a0, a2
+; RV64ZBB-NEXT: beq a3, a1, .LBB22_2
; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: sltu a5, a1, a3
+; RV64ZBB-NEXT: sltu a0, a1, a3
+; RV64ZBB-NEXT: j .LBB22_3
; RV64ZBB-NEXT: .LBB22_2:
-; RV64ZBB-NEXT: bnez a5, .LBB22_4
-; RV64ZBB-NEXT: # %bb.3:
-; RV64ZBB-NEXT: sltu a4, a2, a0
-; RV64ZBB-NEXT: sub a1, a3, a1
-; RV64ZBB-NEXT: sub a1, a1, a4
-; RV64ZBB-NEXT: sub a0, a2, a0
-; RV64ZBB-NEXT: ret
-; RV64ZBB-NEXT: .LBB22_4:
-; RV64ZBB-NEXT: sub a1, a1, a3
-; RV64ZBB-NEXT: sub a1, a1, a4
-; RV64ZBB-NEXT: sub a0, a0, a2
+; RV64ZBB-NEXT: sltu a0, a0, a2
+; RV64ZBB-NEXT: .LBB22_3:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: xor a2, a2, a1
+; RV64ZBB-NEXT: xor a3, a3, a1
+; RV64ZBB-NEXT: sltu a1, a2, a1
+; RV64ZBB-NEXT: add a3, a3, a0
+; RV64ZBB-NEXT: add a0, a2, a0
+; RV64ZBB-NEXT: sub a3, a3, a1
+; RV64ZBB-NEXT: snez a1, a0
+; RV64ZBB-NEXT: add a1, a3, a1
+; RV64ZBB-NEXT: neg a1, a1
+; RV64ZBB-NEXT: neg a0, a0
; RV64ZBB-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index d9064c684cb20..b5f270d4024b3 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -828,17 +828,20 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: cmovgel %edi, %eax
-; X86-NEXT: cmovgel %ebx, %edx
+; X86-NEXT: sbbl %ecx, %ebx
+; X86-NEXT: subl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: cmovll %ebx, %ecx
+; X86-NEXT: cmovll %edi, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: negl %eax
+; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -868,34 +871,42 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 24(%ebp), %ecx
-; X86-NEXT: movl 28(%ebp), %edx
-; X86-NEXT: movl 40(%ebp), %eax
-; X86-NEXT: movl 44(%ebp), %esi
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: negl %ecx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%ebp), %esi
-; X86-NEXT: movl 48(%ebp), %edi
; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl 36(%ebp), %ebx
-; X86-NEXT: movl 52(%ebp), %eax
-; X86-NEXT: sbbl %ebx, %eax
-; X86-NEXT: subl 40(%ebp), %ecx
-; X86-NEXT: sbbl 44(%ebp), %edx
-; X86-NEXT: sbbl 48(%ebp), %esi
-; X86-NEXT: sbbl 52(%ebp), %ebx
-; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovgel %edi, %esi
-; X86-NEXT: cmovgel %eax, %ebx
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -905,15 +916,17 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
;
; X64-LABEL: abd_cmp_i128:
; X64: # %bb.0:
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: movq %rcx, %r8
-; X64-NEXT: sbbq %rsi, %r8
-; X64-NEXT: subq %rdx, %rdi
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: cmovlq %rdi, %rax
-; X64-NEXT: cmovlq %rsi, %r8
-; X64-NEXT: movq %r8, %rdx
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: subq %rdx, %rax
+; X64-NEXT: movq %rsi, %r8
+; X64-NEXT: sbbq %rcx, %r8
+; X64-NEXT: subq %rdi, %rdx
+; X64-NEXT: sbbq %rsi, %rcx
+; X64-NEXT: cmovlq %r8, %rcx
+; X64-NEXT: cmovgeq %rdx, %rax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: negq %rax
+; X64-NEXT: sbbq %rcx, %rdx
; X64-NEXT: retq
%cmp = icmp slt i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index b9e01fda29615..1403f4f03ef44 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -958,20 +958,18 @@ define i32 @abdu_select(i32 %x, i32 %y) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovbel %edx, %eax
+; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abdu_select:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: cmoval %edi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovael %esi, %eax
; X64-NEXT: retq
%sub = sub i32 %x, %y
%cmp = icmp ugt i32 %x, %y
More information about the llvm-commits
mailing list