[llvm] [AArch64] Improve lowering of scalar abs(sub(a, b)). (PR #151180)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 31 09:00:49 PDT 2025


https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/151180

>From c68c3429514f990bc17063cea81f4401ca6821d1 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Mon, 28 Jul 2025 07:32:49 -0700
Subject: [PATCH 1/6] [AArch64] Improve lowering of scalar abs(sub(a, b)).

This patch avoids a comparison against zero when lowering abs(sub(a, b))
patterns, instead reusing the condition codes generated by a subs of the
operands directly.

For example, currently:
```
  sxtb w8, w0
  sub w8, w8, w1, sxtb
  cmp w8, #0
  cneg w0, w8, pl
```
becomes:
```
  sxtb w8, w0
  sxtb w9, w1
  subs w8, w9, w8
  cneg w0, w8, gt
```

Whilst this doesn't decrease the number of instructions utilised, the
new version exposes more ILP and uses ``cheaper'' instructions,
typically having lower latency and/or higher throughput.

This patch also includes a somewhat orthogonal change in
performNegCSelCombine, which I included to avoid a code generation
regression in CodeGen/AArch64/abd[su]-neg.ll due to the combine
negating the operands of the csel, leading to an extra sub instruction.
If preferable, I can open a separate PR for this.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 33 ++++++---
 llvm/test/CodeGen/AArch64/abds-neg.ll         | 30 ++++----
 llvm/test/CodeGen/AArch64/abds.ll             | 74 +++++++++----------
 llvm/test/CodeGen/AArch64/abdu-neg.ll         | 30 ++++----
 llvm/test/CodeGen/AArch64/abdu.ll             | 74 +++++++++----------
 5 files changed, 128 insertions(+), 113 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f9471ca4aa6a..1b98df3d802de 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7118,12 +7118,21 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
 
   SDLoc DL(Op);
-  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
-                            Op.getOperand(0));
-  // Generate SUBS & CSEL.
-  SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
-                            Op.getOperand(0), DAG.getConstant(0, DL, VT));
-  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
+  SDValue Val = Op.getOperand(0);
+  SDValue Neg = DAG.getNegative(Val, DL, VT);
+  SDValue Cmp;
+
+  // For abs(sub(lhs, rhs)), we can compare lhs and rhs directly. This allows
+  // reusing the subs operation for the calculation and comparison.
+  if (Val.getOpcode() == ISD::SUB)
+    Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+                      Val.getOperand(0), Val.getOperand(1));
+  else
+    // Otherwise, compare with zero.
+    Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Val,
+                      DAG.getConstant(0, DL, VT));
+
+  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Val, Neg,
                      DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
                      Cmp.getValue(1));
 }
@@ -20851,11 +20860,17 @@ static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
   if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
     return SDValue();
 
-  SDValue N0N = getNegatedInteger(N0, DAG);
-  SDValue N1N = getNegatedInteger(N1, DAG);
-
   SDLoc DL(N);
   EVT VT = CSel.getValueType();
+
+  // If the operands are negations of each other, reverse them.
+  if ((isNegatedInteger(N0) && N0.getOperand(1) == N1) ||
+      (isNegatedInteger(N1) && N1.getOperand(1) == N0))
+    return DAG.getNode(AArch64ISD::CSEL, DL, VT, N1, N0, CSel.getOperand(2),
+                       CSel.getOperand(3));
+
+  SDValue N0N = getNegatedInteger(N0, DAG);
+  SDValue N1N = getNegatedInteger(N1, DAG);
   return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
                      CSel.getOperand(3));
 }
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 75247823ee793..045edeb8aaac9 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -8,9 +8,9 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -25,9 +25,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -42,9 +42,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -59,9 +59,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
@@ -92,9 +92,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bbdb116851710..cad82f2c7114a 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -8,9 +8,9 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -24,9 +24,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -40,9 +40,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -56,9 +56,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
@@ -87,9 +87,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
@@ -214,9 +214,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -228,9 +228,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -286,9 +286,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i8 %a, %b
@@ -301,9 +301,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i16 %a, %b
@@ -507,11 +507,11 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 ; CHECK-LABEL: vector_legalized:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    addp d0, v0.2d
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w8, w8, mi
+; CHECK-NEXT:    addp d0, v0.2d
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    add x0, x9, x8
 ; CHECK-NEXT:    ret
@@ -532,9 +532,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_select_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxtb w8, w1
+; CHECK-NEXT:    sxtb w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i8 %a, %b
@@ -547,9 +547,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_select_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    sub w8, w8, w1, sxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    sxth w8, w1
+; CHECK-NEXT:    sxth w9, w0
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sle i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index d07f099a536ab..8f79a46ba7801 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -8,9 +8,9 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -25,9 +25,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -42,9 +42,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -59,9 +59,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -92,9 +92,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 1045ee20dc734..bb60f6d0c94cd 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -8,9 +8,9 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -24,9 +24,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -40,9 +40,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -56,9 +56,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -87,9 +87,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -218,9 +218,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -232,9 +232,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -292,9 +292,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt i8 %a, %b
@@ -307,9 +307,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp uge i16 %a, %b
@@ -372,11 +372,11 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 ; CHECK-LABEL: vector_legalized:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w8, w8, mi
+; CHECK-NEXT:    addp d0, v0.2d
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    add x0, x9, x8
 ; CHECK-NEXT:    ret
@@ -397,9 +397,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_select_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1, uxtb
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i8 %a, %b
@@ -412,9 +412,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_select_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1, uxth
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    subs w8, w9, w8
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp ule i16 %a, %b

>From 73365468b38bd9dc785eb363d4d976e9fffeb7db Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 30 Jul 2025 08:48:23 -0700
Subject: [PATCH 2/6] Combine subs(sub(a,b), 0) -> subs(a,b) in
 performCSELCombine.

---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 39 ++++++++++++-------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1b98df3d802de..4e2efe7110864 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7118,21 +7118,12 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
 
   SDLoc DL(Op);
-  SDValue Val = Op.getOperand(0);
-  SDValue Neg = DAG.getNegative(Val, DL, VT);
-  SDValue Cmp;
-
-  // For abs(sub(lhs, rhs)), we can compare lhs and rhs directly. This allows
-  // reusing the subs operation for the calculation and comparison.
-  if (Val.getOpcode() == ISD::SUB)
-    Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
-                      Val.getOperand(0), Val.getOperand(1));
-  else
-    // Otherwise, compare with zero.
-    Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Val,
-                      DAG.getConstant(0, DL, VT));
-
-  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Val, Neg,
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Op.getOperand(0));
+  // Generate SUBS & CSEL.
+  SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+                            Op.getOperand(0), DAG.getConstant(0, DL, VT));
+  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
                      DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
                      Cmp.getValue(1));
 }
@@ -25479,6 +25470,24 @@ static SDValue performCSELCombine(SDNode *N,
     }
   }
 
+  // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't
+  // use overflow flags to avoid the comparison with zero.
+  if (Cond.getOpcode() == AArch64ISD::SUBS &&
+      isNullConstant(Cond.getOperand(1))) {
+    SDValue Sub = Cond.getOperand(0);
+    AArch64CC::CondCode CC =
+        static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
+    if (Sub.getOpcode() == ISD::SUB &&
+        (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
+         CC == AArch64CC::PL)) {
+      SDLoc DL(N);
+      SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
+                                 Sub.getOperand(0), Sub.getOperand(1));
+      return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
+                         N->getOperand(1), N->getOperand(2), Subs.getValue(1));
+    }
+  }
+
   // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z
   if (SDValue CondLast = foldCSELofLASTB(N, DAG))
     return CondLast;

>From e8efeabf751dd2912fb14b14f667ed12ee1f8ca9 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 30 Jul 2025 10:16:07 -0700
Subject: [PATCH 3/6] Update tests after rebase.

---
 llvm/test/CodeGen/AArch64/abds-neg.ll | 24 ++++++++++++------------
 llvm/test/CodeGen/AArch64/abdu-neg.ll | 24 ++++++++++++------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 045edeb8aaac9..3c111e1dd5272 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -77,8 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    subs w8, w1, w8
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w8, w1
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -109,8 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -125,8 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w8, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w8
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -140,8 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -155,8 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -170,8 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 8f79a46ba7801..1592009d9519a 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -77,8 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    subs w8, w1, w8
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w8, w1
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
   %bext = zext i32 %b to i64
@@ -109,8 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i32 %b to i64
@@ -125,8 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    subs w8, w8, w0
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w0, w8
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i16 %b to i64
@@ -140,8 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i32 %b to i64
@@ -155,8 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, hs
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i64 %a to i128
   %bext = zext i64 %b to i128
@@ -170,8 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, hs
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i64 %a to i128
   %bext = zext i64 %b to i128

>From cba370bf5e46e712b2d898a429138c561f6a2093 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 30 Jul 2025 10:19:14 -0700
Subject: [PATCH 4/6] Simplify performNegCSelCombine changes.

Instead of reversing the operands when they are negations of each other,
peek through already existing negations. This has the same effect when
the operands are negations of each other but should be more generally
useful.
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4e2efe7110864..c5a4276306641 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20822,6 +20822,9 @@ static bool isNegatedInteger(SDValue Op) {
 }
 
 static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) {
+  if (isNegatedInteger(Op))
+    return Op.getOperand(1);
+
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
   SDValue Zero = DAG.getConstant(0, DL, VT);
@@ -20851,17 +20854,11 @@ static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
   if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
     return SDValue();
 
-  SDLoc DL(N);
-  EVT VT = CSel.getValueType();
-
-  // If the operands are negations of each other, reverse them.
-  if ((isNegatedInteger(N0) && N0.getOperand(1) == N1) ||
-      (isNegatedInteger(N1) && N1.getOperand(1) == N0))
-    return DAG.getNode(AArch64ISD::CSEL, DL, VT, N1, N0, CSel.getOperand(2),
-                       CSel.getOperand(3));
-
   SDValue N0N = getNegatedInteger(N0, DAG);
   SDValue N1N = getNegatedInteger(N1, DAG);
+
+  SDLoc DL(N);
+  EVT VT = CSel.getValueType();
   return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
                      CSel.getOperand(3));
 }

>From 78619a52ac8ef30ba9978a6651c7b7bc44475896 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 31 Jul 2025 04:50:00 -0700
Subject: [PATCH 5/6] Combine Cond and Sub with Subs.

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  13 +-
 llvm/test/CodeGen/AArch64/abds-neg.ll         |  25 ++--
 llvm/test/CodeGen/AArch64/abds.ll             |  62 ++++------
 llvm/test/CodeGen/AArch64/abdu-neg.ll         |  25 ++--
 llvm/test/CodeGen/AArch64/abdu.ll             |  60 ++++------
 .../CodeGen/AArch64/csel-subs-dag-combine.ll  | 112 ++++++++++++++++++
 llvm/test/CodeGen/AArch64/midpoint-int.ll     |  28 ++---
 7 files changed, 202 insertions(+), 123 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c5a4276306641..1811af8c5cccc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25468,8 +25468,12 @@ static SDValue performCSELCombine(SDNode *N,
   }
 
   // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't
-  // use overflow flags to avoid the comparison with zero.
-  if (Cond.getOpcode() == AArch64ISD::SUBS &&
+  // use overflow flags, to avoid the comparison with zero. In case of success,
+  // this also replaces the original SUB(x,y) with the newly created SUBS(x,y).
+  // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB
+  // nodes with their SUBS equivalent as is already done for other flag-setting
+  // operators, in which case doing the replacement here becomes redundant.
+  if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) &&
       isNullConstant(Cond.getOperand(1))) {
     SDValue Sub = Cond.getOperand(0);
     AArch64CC::CondCode CC =
@@ -25480,8 +25484,9 @@ static SDValue performCSELCombine(SDNode *N,
       SDLoc DL(N);
       SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
                                  Sub.getOperand(0), Sub.getOperand(1));
-      return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
-                         N->getOperand(1), N->getOperand(2), Subs.getValue(1));
+      DCI.CombineTo(Sub.getNode(), Subs);
+      DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1));
+      return SDValue(N, 0);
     }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 3c111e1dd5272..b7d1b3e323d59 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -8,9 +8,8 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -25,9 +24,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -42,9 +40,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -59,9 +56,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
@@ -92,9 +88,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index cad82f2c7114a..bf52e71ec21fe 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -8,9 +8,8 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -24,9 +23,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -40,9 +38,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i8 %a to i64
@@ -56,9 +53,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
@@ -87,9 +83,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
@@ -214,9 +209,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -228,9 +222,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -286,9 +279,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i8 %a, %b
@@ -301,9 +293,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i16 %a, %b
@@ -507,11 +498,10 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 ; CHECK-LABEL: vector_legalized:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
-; CHECK-NEXT:    cneg w8, w8, mi
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    cneg w8, w8, mi
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    add x0, x9, x8
 ; CHECK-NEXT:    ret
@@ -532,9 +522,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_select_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i8 %a, %b
@@ -547,9 +536,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_select_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sle i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 1592009d9519a..943d02cc29bf1 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -8,9 +8,8 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -25,9 +24,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -42,9 +40,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -59,9 +56,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -92,9 +88,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index bb60f6d0c94cd..8d2b0b0742d7d 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -8,9 +8,8 @@
 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -24,9 +23,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -40,9 +38,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
@@ -56,9 +53,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -87,9 +83,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -218,9 +213,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -232,9 +226,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -292,9 +285,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt i8 %a, %b
@@ -307,9 +299,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp uge i16 %a, %b
@@ -372,9 +363,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 ; CHECK-LABEL: vector_legalized:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w8, w8, mi
 ; CHECK-NEXT:    addp d0, v0.2d
 ; CHECK-NEXT:    fmov x9, d0
@@ -397,9 +387,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
 define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_select_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    subs w8, w8, w1, uxtb
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i8 %a, %b
@@ -412,9 +401,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_select_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:    subs w8, w9, w8
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    subs w8, w8, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp ule i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll
new file mode 100644
index 0000000000000..5036be9c45e69
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll
@@ -0,0 +1,112 @@
+; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+; These tests ensure that we don't combine
+;   CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y)
+; if the flags set by SUBS(SUB(x,y), 0) have more than one use.
+;
+; This restriction exists because combining SUBS(SUB(x,y), 0) -> SUBS(x,y) is
+; only valid if there are no users of the overflow flags (C/V) generated by the
+; SUBS. Currently, we only check the flags used by the CSEL, and therefore we
+; conservatively reject cases where the SUBS's flags have other uses.
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs:'
+; CHECK-NEXT: SelectionDAG has 13 nodes:
+; CHECK-NEXT:   t0: ch,glue = EntryToken
+; CHECK-NEXT:   t2: i32,ch = CopyFromReg t0, Register:i32 %0
+; CHECK-NEXT:   t4: i32,ch = CopyFromReg t0, Register:i32 %1
+; CHECK-NEXT:         t5: i32 = sub t2, t4
+; CHECK-NEXT:       t14: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0>
+; CHECK-NEXT:     t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t14:1
+; CHECK-NEXT:   t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16
+; CHECK-NEXT:   t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1
+
+; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs:'
+; CHECK-NEXT: SelectionDAG has 11 nodes:
+; CHECK-NEXT:   t0: ch,glue = EntryToken
+; CHECK-NEXT:   t2: i32,ch = CopyFromReg t0, Register:i32 %0
+; CHECK-NEXT:   t4: i32,ch = CopyFromReg t0, Register:i32 %1
+; CHECK-NEXT:       t18: i32,i32 = AArch64ISD::SUBS t2, t4
+; CHECK-NEXT:     t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t18:1
+; CHECK-NEXT:   t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16
+; CHECK-NEXT:   t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1
+
+define i32 @combine_subs(i32 %a, i32 %b) {
+  %sub = sub i32 %a, %b
+  %cc = icmp ne i32 %sub, 0
+  %sel = select i1 %cc, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:'
+; CHECK-NEXT: SelectionDAG has 14 nodes:
+; CHECK-NEXT:   t0: ch,glue = EntryToken
+; CHECK-NEXT:   t2: i32,ch = CopyFromReg t0, Register:i32 %0
+; CHECK-NEXT:   t4: i32,ch = CopyFromReg t0, Register:i32 %1
+; CHECK-NEXT:   t5: i32 = sub t2, t4
+; CHECK-NEXT:         t15: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0>
+; CHECK-NEXT:       t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t15:1
+; CHECK-NEXT:     t10: i32 = add t17, t5
+; CHECK-NEXT:   t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10
+; CHECK-NEXT:   t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1
+
+; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:'
+; CHECK-NEXT: SelectionDAG has 12 nodes:
+; CHECK-NEXT:   t0: ch,glue = EntryToken
+; CHECK-NEXT:   t2: i32,ch = CopyFromReg t0, Register:i32 %0
+; CHECK-NEXT:   t4: i32,ch = CopyFromReg t0, Register:i32 %1
+; CHECK-NEXT:       t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t19:1
+; CHECK-NEXT:     t10: i32 = add t17, t19
+; CHECK-NEXT:   t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10
+; CHECK-NEXT:   t19: i32,i32 = AArch64ISD::SUBS t2, t4
+; CHECK-NEXT:   t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1
+
+define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) {
+  %sub = sub i32 %a, %b
+  %cc = icmp ne i32 %sub, 0
+  %sel = select i1 %cc, i32 %a, i32 %b
+  %add = add i32 %sel, %sub
+  ret i32 %add
+}
+
+; CHECK-LABEL: Legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:'
+; CHECK-NEXT: SelectionDAG has 19 nodes:
+; CHECK-NEXT:   t0: ch,glue = EntryToken
+; CHECK-NEXT:   t2: i32,ch = CopyFromReg t0, Register:i32 %0
+; CHECK-NEXT:   t4: i32,ch = CopyFromReg t0, Register:i32 %1
+; CHECK-NEXT:       t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1
+; CHECK-NEXT:         t6: i32,ch = CopyFromReg t0, Register:i32 %2
+; CHECK-NEXT:         t8: i32,ch = CopyFromReg t0, Register:i32 %3
+; CHECK-NEXT:       t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1
+; CHECK-NEXT:     t15: i32 = add t24, t23
+; CHECK-NEXT:   t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15
+; CHECK-NEXT:     t9: i32 = sub t2, t4
+; CHECK-NEXT:   t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0>
+; CHECK-NEXT:   t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1
+
+; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:'
+; CHECK-NEXT: SelectionDAG has 19 nodes:
+; CHECK-NEXT:   t0: ch,glue = EntryToken
+; CHECK-NEXT:   t2: i32,ch = CopyFromReg t0, Register:i32 %0
+; CHECK-NEXT:   t4: i32,ch = CopyFromReg t0, Register:i32 %1
+; CHECK-NEXT:       t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1
+; CHECK-NEXT:         t6: i32,ch = CopyFromReg t0, Register:i32 %2
+; CHECK-NEXT:         t8: i32,ch = CopyFromReg t0, Register:i32 %3
+; CHECK-NEXT:       t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1
+; CHECK-NEXT:     t15: i32 = add t24, t23
+; CHECK-NEXT:   t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15
+; CHECK-NEXT:     t9: i32 = sub t2, t4
+; CHECK-NEXT:   t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0>
+; CHECK-NEXT:   t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1
+
+define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) {
+  %sub = sub i32 %a, %b
+  %cc = icmp ne i32 %sub, 0
+  %sel = select i1 %cc, i32 %a, i32 %b
+  %other = select i1 %cc, i32 %c, i32 %d
+  %add = add i32 %sel, %other
+  ret i32 %add
+}
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index 15c1dffae749e..d881e998db2bc 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -255,12 +255,11 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
 define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
 ; CHECK-LABEL: scalar_i16_signed_reg_reg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w9, w1
-; CHECK-NEXT:    sxth w10, w0
+; CHECK-NEXT:    sxth w9, w0
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    subs w9, w10, w9
-; CHECK-NEXT:    cneg w9, w9, mi
+; CHECK-NEXT:    subs w9, w9, w1, sxth
 ; CHECK-NEXT:    cneg w8, w8, le
+; CHECK-NEXT:    cneg w9, w9, mi
 ; CHECK-NEXT:    lsr w9, w9, #1
 ; CHECK-NEXT:    madd w0, w9, w8, w0
 ; CHECK-NEXT:    ret
@@ -278,12 +277,11 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
 define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
 ; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w9, w1, #0xffff
-; CHECK-NEXT:    and w10, w0, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    subs w9, w10, w9
-; CHECK-NEXT:    cneg w9, w9, mi
+; CHECK-NEXT:    subs w9, w9, w1, uxth
 ; CHECK-NEXT:    cneg w8, w8, ls
+; CHECK-NEXT:    cneg w9, w9, mi
 ; CHECK-NEXT:    lsr w9, w9, #1
 ; CHECK-NEXT:    madd w0, w9, w8, w0
 ; CHECK-NEXT:    ret
@@ -382,12 +380,11 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
 define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
 ; CHECK-LABEL: scalar_i8_signed_reg_reg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w9, w1
-; CHECK-NEXT:    sxtb w10, w0
+; CHECK-NEXT:    sxtb w9, w0
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    subs w9, w10, w9
-; CHECK-NEXT:    cneg w9, w9, mi
+; CHECK-NEXT:    subs w9, w9, w1, sxtb
 ; CHECK-NEXT:    cneg w8, w8, le
+; CHECK-NEXT:    cneg w9, w9, mi
 ; CHECK-NEXT:    lsr w9, w9, #1
 ; CHECK-NEXT:    madd w0, w9, w8, w0
 ; CHECK-NEXT:    ret
@@ -405,12 +402,11 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
 define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
 ; CHECK-LABEL: scalar_i8_unsigned_reg_reg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w9, w1, #0xff
-; CHECK-NEXT:    and w10, w0, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    subs w9, w10, w9
-; CHECK-NEXT:    cneg w9, w9, mi
+; CHECK-NEXT:    subs w9, w9, w1, uxtb
 ; CHECK-NEXT:    cneg w8, w8, ls
+; CHECK-NEXT:    cneg w9, w9, mi
 ; CHECK-NEXT:    lsr w9, w9, #1
 ; CHECK-NEXT:    madd w0, w9, w8, w0
 ; CHECK-NEXT:    ret

>From 1741900dc4fbe023b63dea579d979b93289f7da0 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 31 Jul 2025 06:32:43 -0700
Subject: [PATCH 6/6] Revert performNegCSelCombine-related changes.

By combining the original SUB node to the SUBS created when folding
subs(sub(a,b), 0) -> subs(a,b), we no longer create ``swapped'' SUB
nodes, such as sub(b,a), after performNegCSelCombine. Therefore, the
changes to avoid code gen regressions in some cases are no longer
required.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  3 ---
 llvm/test/CodeGen/AArch64/abds-neg.ll         | 24 +++++++++----------
 llvm/test/CodeGen/AArch64/abdu-neg.ll         | 24 +++++++++----------
 3 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1811af8c5cccc..c790df26bc8f9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20822,9 +20822,6 @@ static bool isNegatedInteger(SDValue Op) {
 }
 
 static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) {
-  if (isNegatedInteger(Op))
-    return Op.getOperand(1);
-
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
   SDValue Zero = DAG.getConstant(0, DL, VT);
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index b7d1b3e323d59..02c76ba7343a0 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    subs w8, w8, w1
-; CHECK-NEXT:    cneg w0, w8, gt
+; CHECK-NEXT:    subs w8, w1, w8
+; CHECK-NEXT:    cneg w0, w8, ge
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, gt
+; CHECK-NEXT:    subs w8, w1, w0
+; CHECK-NEXT:    cneg w0, w8, ge
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -120,8 +120,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w0, w8
-; CHECK-NEXT:    cneg w0, w8, gt
+; CHECK-NEXT:    subs w8, w8, w0
+; CHECK-NEXT:    cneg w0, w8, ge
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -135,8 +135,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, gt
+; CHECK-NEXT:    subs w8, w1, w0
+; CHECK-NEXT:    cneg w0, w8, ge
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -150,8 +150,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, gt
+; CHECK-NEXT:    subs x8, x1, x0
+; CHECK-NEXT:    cneg x0, x8, ge
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -165,8 +165,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, gt
+; CHECK-NEXT:    subs x8, x1, x0
+; CHECK-NEXT:    cneg x0, x8, ge
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 943d02cc29bf1..400031b64cb84 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    subs w8, w8, w1
-; CHECK-NEXT:    cneg w0, w8, hi
+; CHECK-NEXT:    subs w8, w1, w8
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
   %bext = zext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, hi
+; CHECK-NEXT:    subs w8, w1, w0
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i32 %b to i64
@@ -120,8 +120,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    subs w8, w0, w8
-; CHECK-NEXT:    cneg w0, w8, hi
+; CHECK-NEXT:    subs w8, w8, w0
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i16 %b to i64
@@ -135,8 +135,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, hi
+; CHECK-NEXT:    subs w8, w1, w0
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i32 %b to i64
@@ -150,8 +150,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, hi
+; CHECK-NEXT:    subs x8, x1, x0
+; CHECK-NEXT:    cneg x0, x8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i64 %a to i128
   %bext = zext i64 %b to i128
@@ -165,8 +165,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, hi
+; CHECK-NEXT:    subs x8, x1, x0
+; CHECK-NEXT:    cneg x0, x8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i64 %a to i128
   %bext = zext i64 %b to i128



More information about the llvm-commits mailing list