[llvm] [AArch64] Combine subtract with borrow to SBC. (PR #165271)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 27 09:24:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
Specifically, this patch adds the following combines:
SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
The CSET may be preceded by a ZEXT.
Fixes #<!-- -->164748, but please let me know if anyone has a better suggestion.
---
Full diff: https://github.com/llvm/llvm-project/pull/165271.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+33)
- (added) llvm/test/CodeGen/AArch64/sbc.ll (+392)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d16b11686e3c1..f7cdfd00d84ec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22328,6 +22328,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
+// Attempt to combine the following patterns:
+// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
+// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
+// The CSET may be preceded by a ZEXT.
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+ N1 = N1.getOperand(0);
+ if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO)
+ return SDValue();
+
+ SDValue Flags = N1.getOperand(3);
+ if (Flags.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ if (N0->getOpcode() != ISD::SUB)
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+ Flags);
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), Flags);
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -22349,6 +22380,8 @@ static SDValue performAddSubCombine(SDNode *N,
return Val;
if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
return Val;
+ if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG))
+ return Val;
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
new file mode 100644
index 0000000000000..fff63c1709218
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -0,0 +1,392 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_basic_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_basic_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sbc x0, x2, x3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_mixed_i32_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc x0, x2, x3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i32_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_mixed_i64_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i64_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_only_borrow:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, wzr
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_only_borrow:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w2, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_sext_add:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sext_add:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sbfx w8, w8, #0, #1
+; CHECK-GI-NEXT: add w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = sext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = add i32 %sub, %carry
+ ret i32 %res
+}
+
+; FIXME: This case could be supported with reversed operands to the CMP.
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_ugt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, hi
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_ugt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ugt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_slt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_slt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp slt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_sgt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, gt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_sgt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp sgt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_setcc_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_setcc_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i1 %cc)
+ ret i32 %res
+}
+
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_carry_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_carry_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i32 %carry)
+ ret i32 %res
+}
+
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_multiple_sub_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: sbc w19, w2, w3
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_sub_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: sub w19, w2, w3
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w0, w19
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ tail call void @use(i32 %sub)
+ ret i32 %res
+}
+
+define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
+; CHECK-SD-LABEL: test_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: cmp w8, w1, uxtb
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxtb
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i8 %a, %b
+ %carry = zext i1 %cc to i8
+ %sub = sub i8 %x, %y
+ %res = sub i8 %sub, %carry
+ ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
+; CHECK-SD-LABEL: test_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: cmp w8, w1, uxth
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxth
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i16 %a, %b
+ %carry = zext i1 %cc to i16
+ %sub = sub i16 %x, %y
+ %res = sub i16 %sub, %carry
+ ret i16 %res
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-SD-LABEL: test_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub v2.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v4.4s, #1
+; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: sub v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult <4 x i32> %a, %b
+ %carry = zext <4 x i1> %cc to <4 x i32>
+ %sub = sub <4 x i32> %x, %y
+ %res = sub <4 x i32> %sub, %carry
+ ret <4 x i32> %res
+}
+
+declare void @use()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
``````````
</details>
https://github.com/llvm/llvm-project/pull/165271
More information about the llvm-commits
mailing list