[llvm] [AArch64] Combine subtract with borrow to SBC. (PR #165271)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 7 08:13:38 PST 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/165271
>From 77ed715c5e854fe45f17790e51a3b19d032faca5 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 24 Oct 2025 07:05:46 -0700
Subject: [PATCH 1/3] Add tests.
---
llvm/test/CodeGen/AArch64/sbc.ll | 398 +++++++++++++++++++++++++++++++
1 file changed, 398 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/sbc.ll
diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
new file mode 100644
index 0000000000000..4af64d485cc5f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_basic_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_basic_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sub x8, x2, x3
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub x0, x8, x9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_mixed_i32_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub x8, x2, x3
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub x0, x8, x9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i32_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_mixed_i64_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i64_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-LABEL: test_only_borrow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: cset w8, lo
+; CHECK-NEXT: sub w0, w2, w8
+; CHECK-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_sext_add:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sext_add:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sbfx w8, w8, #0, #1
+; CHECK-GI-NEXT: add w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = sext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = add i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_ugt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, hi
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_ugt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ugt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_slt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_slt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp slt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_sgt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, gt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_sgt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp sgt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_setcc_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_setcc_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i1 %cc)
+ ret i32 %res
+}
+
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_carry_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_carry_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i32 %carry)
+ ret i32 %res
+}
+
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_multiple_sub_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: sub w19, w8, w9
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_sub_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: sub w19, w2, w3
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w0, w19
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ tail call void @use(i32 %sub)
+ ret i32 %res
+}
+
+define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
+; CHECK-SD-LABEL: test_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w9, w0, #0xff
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cmp w9, w1, uxtb
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxtb
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i8 %a, %b
+ %carry = zext i1 %cc to i8
+ %sub = sub i8 %x, %y
+ %res = sub i8 %sub, %carry
+ ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
+; CHECK-SD-LABEL: test_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w9, w0, #0xffff
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cmp w9, w1, uxth
+; CHECK-SD-NEXT: cset w9, lo
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxth
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i16 %a, %b
+ %carry = zext i1 %cc to i16
+ %sub = sub i16 %x, %y
+ %res = sub i16 %sub, %carry
+ ret i16 %res
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-SD-LABEL: test_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub v2.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v4.4s, #1
+; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: sub v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult <4 x i32> %a, %b
+ %carry = zext <4 x i1> %cc to <4 x i32>
+ %sub = sub <4 x i32> %x, %y
+ %res = sub <4 x i32> %sub, %carry
+ ret <4 x i32> %res
+}
+
+declare void @use()
>From 4cbc151702b80b8ee75005890c560a1c1008f1fc Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 24 Oct 2025 10:04:36 -0700
Subject: [PATCH 2/3] [AArch64] Combine subtract with borrow to SBC.
Specifically, this patch adds the following combines:
SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
The CSET may be preceded by a ZEXT.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 33 ++++++++++
llvm/test/CodeGen/AArch64/sbc.ll | 60 +++++++++----------
2 files changed, 60 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d16b11686e3c1..f7cdfd00d84ec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22328,6 +22328,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
+// Attempt to combine the following patterns:
+// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
+// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
+// The CSET may be preceded by a ZEXT.
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+ N1 = N1.getOperand(0);
+ if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO)
+ return SDValue();
+
+ SDValue Flags = N1.getOperand(3);
+ if (Flags.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ if (N0->getOpcode() != ISD::SUB)
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+ Flags);
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), Flags);
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -22349,6 +22380,8 @@ static SDValue performAddSubCombine(SDNode *N,
return Val;
if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
return Val;
+ if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG))
+ return Val;
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
index 4af64d485cc5f..fff63c1709218 100644
--- a/llvm/test/CodeGen/AArch64/sbc.ll
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -8,9 +8,7 @@ define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
; CHECK-SD-LABEL: test_basic_i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp w0, w1
-; CHECK-SD-NEXT: sub w8, w2, w3
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: sbc w0, w2, w3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_basic_i32:
@@ -31,9 +29,7 @@ define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
; CHECK-SD-LABEL: test_basic_i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp x0, x1
-; CHECK-SD-NEXT: sub x8, x2, x3
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub x0, x8, x9
+; CHECK-SD-NEXT: sbc x0, x2, x3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_basic_i64:
@@ -54,9 +50,7 @@ define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
; CHECK-SD-LABEL: test_mixed_i32_i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp w0, w1
-; CHECK-SD-NEXT: sub x8, x2, x3
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub x0, x8, x9
+; CHECK-SD-NEXT: sbc x0, x2, x3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_mixed_i32_i64:
@@ -77,9 +71,7 @@ define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
; CHECK-SD-LABEL: test_mixed_i64_i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp x0, x1
-; CHECK-SD-NEXT: sub w8, w2, w3
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: sbc w0, w2, w3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_mixed_i64_i32:
@@ -97,12 +89,18 @@ define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
}
define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
-; CHECK-LABEL: test_only_borrow:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset w8, lo
-; CHECK-NEXT: sub w0, w2, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_only_borrow:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, wzr
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_only_borrow:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w2, w8
+; CHECK-GI-NEXT: ret
%cc = icmp ult i32 %a, %b
%carry = zext i1 %cc to i32
%res = sub i32 %x, %carry
@@ -113,9 +111,7 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
; CHECK-SD-LABEL: test_sext_add:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp w0, w1
-; CHECK-SD-NEXT: sub w8, w2, w3
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: sbc w0, w2, w3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_sext_add:
@@ -133,6 +129,7 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
ret i32 %res
}
+; FIXME: This case could be supported with reversed operands to the CMP.
define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
; CHECK-SD-LABEL: test_ugt:
; CHECK-SD: // %bb.0:
@@ -289,9 +286,8 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
; CHECK-SD-NEXT: .cfi_offset w30, -16
; CHECK-SD-NEXT: sub w8, w2, w3
; CHECK-SD-NEXT: cmp w0, w1
-; CHECK-SD-NEXT: cset w9, lo
; CHECK-SD-NEXT: mov w0, w8
-; CHECK-SD-NEXT: sub w19, w8, w9
+; CHECK-SD-NEXT: sbc w19, w2, w3
; CHECK-SD-NEXT: bl use
; CHECK-SD-NEXT: mov w0, w19
; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
@@ -325,11 +321,9 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
; CHECK-SD-LABEL: test_i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: and w9, w0, #0xff
-; CHECK-SD-NEXT: sub w8, w2, w3
-; CHECK-SD-NEXT: cmp w9, w1, uxtb
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: cmp w8, w1, uxtb
+; CHECK-SD-NEXT: sbc w0, w2, w3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_i8:
@@ -350,11 +344,9 @@ define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
; CHECK-SD-LABEL: test_i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: and w9, w0, #0xffff
-; CHECK-SD-NEXT: sub w8, w2, w3
-; CHECK-SD-NEXT: cmp w9, w1, uxth
-; CHECK-SD-NEXT: cset w9, lo
-; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: cmp w8, w1, uxth
+; CHECK-SD-NEXT: sbc w0, w2, w3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_i16:
@@ -396,3 +388,5 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32>
}
declare void @use()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 5f4e210ac3ec07a31be826f516106fd99ad22141 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 7 Nov 2025 04:40:47 -0800
Subject: [PATCH 3/3] Switch special case handling.
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f7cdfd00d84ec..0e6d5dc7effab 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22352,11 +22352,11 @@ static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
- if (N0->getOpcode() != ISD::SUB)
- return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
- Flags);
- return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
- N0.getOperand(1), Flags);
+ if (N0->getOpcode() == ISD::SUB)
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), Flags);
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+ Flags);
}
static SDValue performAddSubCombine(SDNode *N,
More information about the llvm-commits
mailing list