[llvm] [AArch64] Combine subtract with borrow to SBC. (PR #165271)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 7 08:13:38 PST 2025


https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/165271

>From 77ed715c5e854fe45f17790e51a3b19d032faca5 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 24 Oct 2025 07:05:46 -0700
Subject: [PATCH 1/3] Add tests.

---
 llvm/test/CodeGen/AArch64/sbc.ll | 398 +++++++++++++++++++++++++++++++
 1 file changed, 398 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sbc.ll

diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
new file mode 100644
index 0000000000000..4af64d485cc5f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_basic_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_basic_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_basic_i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp x0, x1
+; CHECK-SD-NEXT:    sub x8, x2, x3
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub x0, x8, x9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_basic_i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp x0, x1
+; CHECK-GI-NEXT:    sub x9, x2, x3
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub x0, x9, x8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i64 %a, %b
+  %carry = zext i1 %cc to i64
+  %sub = sub i64 %x, %y
+  %res = sub i64 %sub, %carry
+  ret i64 %res
+}
+
+define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_mixed_i32_i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sub x8, x2, x3
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub x0, x8, x9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_mixed_i32_i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    sub x9, x2, x3
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub x0, x9, x8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i64
+  %sub = sub i64 %x, %y
+  %res = sub i64 %sub, %carry
+  ret i64 %res
+}
+
+define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_mixed_i64_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp x0, x1
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_mixed_i64_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp x0, x1
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i64 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-LABEL: test_only_borrow:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, lo
+; CHECK-NEXT:    sub w0, w2, w8
+; CHECK-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 %x, %carry
+  ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_sext_add:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sext_add:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sbfx w8, w8, #0, #1
+; CHECK-GI-NEXT:    add w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = sext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = add i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_ugt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cset w9, hi
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ugt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cset w8, hi
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ugt i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_slt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cset w9, lt
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_slt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cset w8, lt
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp slt i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_sgt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cset w9, gt
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_sgt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cset w8, gt
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp sgt i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_setcc_uses:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    cset w0, lo
+; CHECK-SD-NEXT:    sub w19, w2, w0
+; CHECK-SD-NEXT:    bl use
+; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_multiple_setcc_uses:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    mov w19, w2
+; CHECK-GI-NEXT:    cset w20, lo
+; CHECK-GI-NEXT:    mov w0, w20
+; CHECK-GI-NEXT:    bl use
+; CHECK-GI-NEXT:    sub w0, w19, w20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 %x, %carry
+  tail call void @use(i1 %cc)
+  ret i32 %res
+}
+
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_carry_uses:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    cset w0, lo
+; CHECK-SD-NEXT:    sub w19, w2, w0
+; CHECK-SD-NEXT:    bl use
+; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_multiple_carry_uses:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    mov w19, w2
+; CHECK-GI-NEXT:    cset w20, lo
+; CHECK-GI-NEXT:    mov w0, w20
+; CHECK-GI-NEXT:    bl use
+; CHECK-GI-NEXT:    sub w0, w19, w20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 %x, %carry
+  tail call void @use(i32 %carry)
+  ret i32 %res
+}
+
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_multiple_sub_uses:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    mov w0, w8
+; CHECK-SD-NEXT:    sub w19, w8, w9
+; CHECK-SD-NEXT:    bl use
+; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_multiple_sub_uses:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w20, -16
+; CHECK-GI-NEXT:    .cfi_offset w30, -32
+; CHECK-GI-NEXT:    sub w19, w2, w3
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    cset w20, lo
+; CHECK-GI-NEXT:    bl use
+; CHECK-GI-NEXT:    sub w0, w19, w20
+; CHECK-GI-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  tail call void @use(i32 %sub)
+  ret i32 %res
+}
+
+define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
+; CHECK-SD-LABEL: test_i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    and w9, w0, #0xff
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cmp w9, w1, uxtb
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    and w8, w0, #0xff
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cmp w8, w1, uxtb
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i8 %a, %b
+  %carry = zext i1 %cc to i8
+  %sub = sub i8 %x, %y
+  %res = sub i8 %sub, %carry
+  ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
+; CHECK-SD-LABEL: test_i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    and w9, w0, #0xffff
+; CHECK-SD-NEXT:    sub w8, w2, w3
+; CHECK-SD-NEXT:    cmp w9, w1, uxth
+; CHECK-SD-NEXT:    cset w9, lo
+; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    and w8, w0, #0xffff
+; CHECK-GI-NEXT:    sub w9, w2, w3
+; CHECK-GI-NEXT:    cmp w8, w1, uxth
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub w0, w9, w8
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult i16 %a, %b
+  %carry = zext i1 %cc to i16
+  %sub = sub i16 %x, %y
+  %res = sub i16 %sub, %carry
+  ret i16 %res
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-SD-LABEL: test_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sub v2.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT:    cmhi v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    add v0.4s, v2.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v4.4s, #1
+; CHECK-GI-NEXT:    cmhi v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    sub v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    ret
+  %cc = icmp ult <4 x i32> %a, %b
+  %carry = zext <4 x i1> %cc to <4 x i32>
+  %sub = sub <4 x i32> %x, %y
+  %res = sub <4 x i32> %sub, %carry
+  ret <4 x i32> %res
+}
+
+declare void @use()

>From 4cbc151702b80b8ee75005890c560a1c1008f1fc Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 24 Oct 2025 10:04:36 -0700
Subject: [PATCH 2/3] [AArch64] Combine subtract with borrow to SBC.

Specifically, this patch adds the following combines:
  SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
  SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)

The CSET may be preceded by a ZEXT.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 33 ++++++++++
 llvm/test/CodeGen/AArch64/sbc.ll              | 60 +++++++++----------
 2 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d16b11686e3c1..f7cdfd00d84ec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22328,6 +22328,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
 }
 
+// Attempt to combine the following patterns:
+//   SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
+//   SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
+// The CSET may be preceded by a ZEXT.
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() != ISD::SUB)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  SDValue N1 = N->getOperand(1);
+  if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+    N1 = N1.getOperand(0);
+  if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO)
+    return SDValue();
+
+  SDValue Flags = N1.getOperand(3);
+  if (Flags.getOpcode() != AArch64ISD::SUBS)
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  if (N0->getOpcode() != ISD::SUB)
+    return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+                       Flags);
+  return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+                     N0.getOperand(1), Flags);
+}
+
 static SDValue performAddSubCombine(SDNode *N,
                                     TargetLowering::DAGCombinerInfo &DCI) {
   // Try to change sum of two reductions.
@@ -22349,6 +22380,8 @@ static SDValue performAddSubCombine(SDNode *N,
     return Val;
   if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
     return Val;
+  if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG))
+    return Val;
 
   if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
     return Val;
diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
index 4af64d485cc5f..fff63c1709218 100644
--- a/llvm/test/CodeGen/AArch64/sbc.ll
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -8,9 +8,7 @@ define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
 ; CHECK-SD-LABEL: test_basic_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    cmp w0, w1
-; CHECK-SD-NEXT:    sub w8, w2, w3
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    sbc w0, w2, w3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_basic_i32:
@@ -31,9 +29,7 @@ define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
 ; CHECK-SD-LABEL: test_basic_i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    cmp x0, x1
-; CHECK-SD-NEXT:    sub x8, x2, x3
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub x0, x8, x9
+; CHECK-SD-NEXT:    sbc x0, x2, x3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_basic_i64:
@@ -54,9 +50,7 @@ define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
 ; CHECK-SD-LABEL: test_mixed_i32_i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    cmp w0, w1
-; CHECK-SD-NEXT:    sub x8, x2, x3
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub x0, x8, x9
+; CHECK-SD-NEXT:    sbc x0, x2, x3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_mixed_i32_i64:
@@ -77,9 +71,7 @@ define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
 ; CHECK-SD-LABEL: test_mixed_i64_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    cmp x0, x1
-; CHECK-SD-NEXT:    sub w8, w2, w3
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    sbc w0, w2, w3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_mixed_i64_i32:
@@ -97,12 +89,18 @@ define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
 }
 
 define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
-; CHECK-LABEL: test_only_borrow:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    sub w0, w2, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_only_borrow:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, w1
+; CHECK-SD-NEXT:    sbc w0, w2, wzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_only_borrow:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmp w0, w1
+; CHECK-GI-NEXT:    cset w8, lo
+; CHECK-GI-NEXT:    sub w0, w2, w8
+; CHECK-GI-NEXT:    ret
   %cc = icmp ult i32 %a, %b
   %carry = zext i1 %cc to i32
   %res = sub i32 %x, %carry
@@ -113,9 +111,7 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
 ; CHECK-SD-LABEL: test_sext_add:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    cmp w0, w1
-; CHECK-SD-NEXT:    sub w8, w2, w3
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    sbc w0, w2, w3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_sext_add:
@@ -133,6 +129,7 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
   ret i32 %res
 }
 
+; FIXME: This case could be supported with reversed operands to the CMP.
 define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
 ; CHECK-SD-LABEL: test_ugt:
 ; CHECK-SD:       // %bb.0:
@@ -289,9 +286,8 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    sub w8, w2, w3
 ; CHECK-SD-NEXT:    cmp w0, w1
-; CHECK-SD-NEXT:    cset w9, lo
 ; CHECK-SD-NEXT:    mov w0, w8
-; CHECK-SD-NEXT:    sub w19, w8, w9
+; CHECK-SD-NEXT:    sbc w19, w2, w3
 ; CHECK-SD-NEXT:    bl use
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
@@ -325,11 +321,9 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
 define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
 ; CHECK-SD-LABEL: test_i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    and w9, w0, #0xff
-; CHECK-SD-NEXT:    sub w8, w2, w3
-; CHECK-SD-NEXT:    cmp w9, w1, uxtb
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    and w8, w0, #0xff
+; CHECK-SD-NEXT:    cmp w8, w1, uxtb
+; CHECK-SD-NEXT:    sbc w0, w2, w3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_i8:
@@ -350,11 +344,9 @@ define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
 define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
 ; CHECK-SD-LABEL: test_i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    and w9, w0, #0xffff
-; CHECK-SD-NEXT:    sub w8, w2, w3
-; CHECK-SD-NEXT:    cmp w9, w1, uxth
-; CHECK-SD-NEXT:    cset w9, lo
-; CHECK-SD-NEXT:    sub w0, w8, w9
+; CHECK-SD-NEXT:    and w8, w0, #0xffff
+; CHECK-SD-NEXT:    cmp w8, w1, uxth
+; CHECK-SD-NEXT:    sbc w0, w2, w3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_i16:
@@ -396,3 +388,5 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32>
 }
 
 declare void @use()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 5f4e210ac3ec07a31be826f516106fd99ad22141 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 7 Nov 2025 04:40:47 -0800
Subject: [PATCH 3/3] Switch special case handling.

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f7cdfd00d84ec..0e6d5dc7effab 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22352,11 +22352,11 @@ static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
 
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
-  if (N0->getOpcode() != ISD::SUB)
-    return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
-                       Flags);
-  return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
-                     N0.getOperand(1), Flags);
+  if (N0->getOpcode() == ISD::SUB)
+    return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+                       N0.getOperand(1), Flags);
+  return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+                     Flags);
 }
 
 static SDValue performAddSubCombine(SDNode *N,



More information about the llvm-commits mailing list