[llvm] [PPC] Combine sub with carry to SUBE (PR #185671)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 08:34:43 PDT 2026


https://github.com/SiliconA-Z created https://github.com/llvm/llvm-project/pull/185671

None

>From 3374087dd1caa49a698f503ae2a8209ba17885a9 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 10 Mar 2026 10:39:01 -0400
Subject: [PATCH 1/2] Create subfe-combine.ll

---
 llvm/test/CodeGen/PowerPC/subfe-combine.ll | 446 +++++++++++++++++++++
 1 file changed, 446 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/subfe-combine.ll

diff --git a/llvm/test/CodeGen/PowerPC/subfe-combine.ll b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
new file mode 100644
index 0000000000000..40d1f1fe7ad73
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
@@ -0,0 +1,446 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck --check-prefix=CHECK-PPC64LE %s
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr7 | FileCheck --check-prefix=CHECK-PPC32 %s
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_basic_i32:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_basic_i32:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    isellt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 4, 5, 6
+; CHECK-PPC32-NEXT:    sub 3, 4, 3
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-PPC64LE-LABEL: test_only_borrow:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 3, 5, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_only_borrow:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    isellt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 3, 5, 3
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 %x, %carry
+  ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_sext_add:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_sext_add:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    sub 3, 5, 6
+; CHECK-PPC32-NEXT:    li 4, 0
+; CHECK-PPC32-NEXT:    li 5, 1
+; CHECK-PPC32-NEXT:    isellt 4, 5, 4
+; CHECK-PPC32-NEXT:    sub 3, 3, 4
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = sext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = add i32 %sub, %carry
+  ret i32 %res
+}
+
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_ugt:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_ugt:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    iselgt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 4, 5, 6
+; CHECK-PPC32-NEXT:    sub 3, 4, 3
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ugt i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-PPC64LE-LABEL: test_basic_i64:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    subc 4, 3, 4
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    subfe 3, 3, 3
+; CHECK-PPC64LE-NEXT:    neg 3, 3
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_basic_i64:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 5
+; CHECK-PPC32-NEXT:    cmplw 1, 4, 6
+; CHECK-PPC32-NEXT:    li 3, 1
+; CHECK-PPC32-NEXT:    subc 4, 8, 10
+; CHECK-PPC32-NEXT:    crandc 20, 0, 2
+; CHECK-PPC32-NEXT:    crand 21, 2, 4
+; CHECK-PPC32-NEXT:    li 5, 0
+; CHECK-PPC32-NEXT:    subfe 6, 9, 7
+; CHECK-PPC32-NEXT:    crnor 20, 21, 20
+; CHECK-PPC32-NEXT:    isel 3, 0, 3, 20
+; CHECK-PPC32-NEXT:    subc 4, 4, 3
+; CHECK-PPC32-NEXT:    subfe 3, 5, 6
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i64 %a, %b
+  %carry = zext i1 %cc to i64
+  %sub = sub i64 %x, %y
+  %res = sub i64 %sub, %carry
+  ret i64 %res
+}
+
+define i64 @test_only_borrow_i64(i64 %a, i64 %b, i64 %x) {
+; CHECK-PPC64LE-LABEL: test_only_borrow_i64:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    subc 4, 3, 4
+; CHECK-PPC64LE-NEXT:    subfe 3, 3, 3
+; CHECK-PPC64LE-NEXT:    neg 3, 3
+; CHECK-PPC64LE-NEXT:    sub 3, 5, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_only_borrow_i64:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 5
+; CHECK-PPC32-NEXT:    cmplw 1, 4, 6
+; CHECK-PPC32-NEXT:    li 3, 1
+; CHECK-PPC32-NEXT:    li 5, 0
+; CHECK-PPC32-NEXT:    crandc 20, 0, 2
+; CHECK-PPC32-NEXT:    crand 21, 2, 4
+; CHECK-PPC32-NEXT:    crnor 20, 21, 20
+; CHECK-PPC32-NEXT:    isel 3, 0, 3, 20
+; CHECK-PPC32-NEXT:    subc 4, 8, 3
+; CHECK-PPC32-NEXT:    subfe 3, 5, 7
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i64 %a, %b
+  %carry = zext i1 %cc to i64
+  %res = sub i64 %x, %carry
+  ret i64 %res
+}
+
+define i64 @test_ugt_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-PPC64LE-LABEL: test_ugt_i64:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    subc 3, 4, 3
+; CHECK-PPC64LE-NEXT:    subfe 3, 4, 4
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    neg 3, 3
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_ugt_i64:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 5
+; CHECK-PPC32-NEXT:    cmplw 1, 4, 6
+; CHECK-PPC32-NEXT:    li 3, 1
+; CHECK-PPC32-NEXT:    subc 4, 8, 10
+; CHECK-PPC32-NEXT:    crandc 20, 1, 2
+; CHECK-PPC32-NEXT:    crand 21, 2, 5
+; CHECK-PPC32-NEXT:    li 5, 0
+; CHECK-PPC32-NEXT:    subfe 6, 9, 7
+; CHECK-PPC32-NEXT:    crnor 20, 21, 20
+; CHECK-PPC32-NEXT:    isel 3, 0, 3, 20
+; CHECK-PPC32-NEXT:    subc 4, 4, 3
+; CHECK-PPC32-NEXT:    subfe 3, 5, 6
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ugt i64 %a, %b
+  %carry = zext i1 %cc to i64
+  %sub = sub i64 %x, %y
+  %res = sub i64 %sub, %carry
+  ret i64 %res
+}
+
+; Negative test: signed comparison should not be combined
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_unsupported_cc_slt:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    extsw 3, 3
+; CHECK-PPC64LE-NEXT:    extsw 4, 4
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_unsupported_cc_slt:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmpw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    isellt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 4, 5, 6
+; CHECK-PPC32-NEXT:    sub 3, 4, 3
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp slt i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+; Negative test: signed comparison should not be combined
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_unsupported_cc_sgt:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    extsw 4, 4
+; CHECK-PPC64LE-NEXT:    extsw 3, 3
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_unsupported_cc_sgt:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmpw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    iselgt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 4, 5, 6
+; CHECK-PPC32-NEXT:    sub 3, 4, 3
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp sgt i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  ret i32 %res
+}
+
+; Negative test: multiple uses of setcc prevents combine
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-PPC64LE-LABEL: test_multiple_setcc_uses:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    mflr 0
+; CHECK-PPC64LE-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-PPC64LE-NEXT:    .cfi_offset lr, 16
+; CHECK-PPC64LE-NEXT:    .cfi_offset r30, -16
+; CHECK-PPC64LE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-PPC64LE-NEXT:    stdu 1, -48(1)
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    std 0, 64(1)
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 30, 5, 3
+; CHECK-PPC64LE-NEXT:    bl use
+; CHECK-PPC64LE-NEXT:    nop
+; CHECK-PPC64LE-NEXT:    mr 3, 30
+; CHECK-PPC64LE-NEXT:    addi 1, 1, 48
+; CHECK-PPC64LE-NEXT:    ld 0, 16(1)
+; CHECK-PPC64LE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-PPC64LE-NEXT:    mtlr 0
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_multiple_setcc_uses:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    mflr 0
+; CHECK-PPC32-NEXT:    stwu 1, -16(1)
+; CHECK-PPC32-NEXT:    stw 0, 20(1)
+; CHECK-PPC32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-PPC32-NEXT:    .cfi_offset lr, 4
+; CHECK-PPC32-NEXT:    .cfi_offset r30, -8
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    stw 30, 8(1) # 4-byte Folded Spill
+; CHECK-PPC32-NEXT:    isellt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 30, 5, 3
+; CHECK-PPC32-NEXT:    bl use
+; CHECK-PPC32-NEXT:    mr 3, 30
+; CHECK-PPC32-NEXT:    lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK-PPC32-NEXT:    lwz 0, 20(1)
+; CHECK-PPC32-NEXT:    addi 1, 1, 16
+; CHECK-PPC32-NEXT:    mtlr 0
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 %x, %carry
+  tail call void @use(i1 %cc)
+  ret i32 %res
+}
+
+; Negative test: multiple uses of carry prevents combine
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-PPC64LE-LABEL: test_multiple_carry_uses:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    mflr 0
+; CHECK-PPC64LE-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-PPC64LE-NEXT:    .cfi_offset lr, 16
+; CHECK-PPC64LE-NEXT:    .cfi_offset r30, -16
+; CHECK-PPC64LE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-PPC64LE-NEXT:    stdu 1, -48(1)
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    std 0, 64(1)
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 30, 5, 3
+; CHECK-PPC64LE-NEXT:    bl use
+; CHECK-PPC64LE-NEXT:    nop
+; CHECK-PPC64LE-NEXT:    mr 3, 30
+; CHECK-PPC64LE-NEXT:    addi 1, 1, 48
+; CHECK-PPC64LE-NEXT:    ld 0, 16(1)
+; CHECK-PPC64LE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-PPC64LE-NEXT:    mtlr 0
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_multiple_carry_uses:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    mflr 0
+; CHECK-PPC32-NEXT:    stwu 1, -16(1)
+; CHECK-PPC32-NEXT:    stw 0, 20(1)
+; CHECK-PPC32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-PPC32-NEXT:    .cfi_offset lr, 4
+; CHECK-PPC32-NEXT:    .cfi_offset r30, -8
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    stw 30, 8(1) # 4-byte Folded Spill
+; CHECK-PPC32-NEXT:    isellt 3, 4, 3
+; CHECK-PPC32-NEXT:    sub 30, 5, 3
+; CHECK-PPC32-NEXT:    bl use
+; CHECK-PPC32-NEXT:    mr 3, 30
+; CHECK-PPC32-NEXT:    lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK-PPC32-NEXT:    lwz 0, 20(1)
+; CHECK-PPC32-NEXT:    addi 1, 1, 16
+; CHECK-PPC32-NEXT:    mtlr 0
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 %x, %carry
+  tail call void @use(i32 %carry)
+  ret i32 %res
+}
+
+; The inner sub has multiple uses; we can still combine
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_multiple_sub_uses:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    mflr 0
+; CHECK-PPC64LE-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-PPC64LE-NEXT:    .cfi_offset lr, 16
+; CHECK-PPC64LE-NEXT:    .cfi_offset r30, -16
+; CHECK-PPC64LE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-PPC64LE-NEXT:    stdu 1, -48(1)
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    std 0, 64(1)
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    sub 30, 4, 3
+; CHECK-PPC64LE-NEXT:    clrldi 3, 4, 32
+; CHECK-PPC64LE-NEXT:    bl use
+; CHECK-PPC64LE-NEXT:    nop
+; CHECK-PPC64LE-NEXT:    mr 3, 30
+; CHECK-PPC64LE-NEXT:    addi 1, 1, 48
+; CHECK-PPC64LE-NEXT:    ld 0, 16(1)
+; CHECK-PPC64LE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-PPC64LE-NEXT:    mtlr 0
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_multiple_sub_uses:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    mflr 0
+; CHECK-PPC32-NEXT:    stwu 1, -16(1)
+; CHECK-PPC32-NEXT:    stw 0, 20(1)
+; CHECK-PPC32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-PPC32-NEXT:    .cfi_offset lr, 4
+; CHECK-PPC32-NEXT:    .cfi_offset r30, -8
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    stw 30, 8(1) # 4-byte Folded Spill
+; CHECK-PPC32-NEXT:    isellt 4, 4, 3
+; CHECK-PPC32-NEXT:    sub 3, 5, 6
+; CHECK-PPC32-NEXT:    sub 30, 3, 4
+; CHECK-PPC32-NEXT:    bl use
+; CHECK-PPC32-NEXT:    mr 3, 30
+; CHECK-PPC32-NEXT:    lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK-PPC32-NEXT:    lwz 0, 20(1)
+; CHECK-PPC32-NEXT:    addi 1, 1, 16
+; CHECK-PPC32-NEXT:    mtlr 0
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %sub = sub i32 %x, %y
+  %res = sub i32 %sub, %carry
+  tail call void @use(i32 %sub)
+  ret i32 %res
+}
+
+; Negative test: sub from zero should not combine (result would be negative carry)
+define i32 @test_sub_from_zero(i32 %a, i32 %b) {
+; CHECK-PPC64LE-LABEL: test_sub_from_zero:
+; CHECK-PPC64LE:       # %bb.0:
+; CHECK-PPC64LE-NEXT:    clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT:    clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT:    sub 3, 3, 4
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT:    neg 3, 3
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC32-LABEL: test_sub_from_zero:
+; CHECK-PPC32:       # %bb.0:
+; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    li 3, 0
+; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    isellt 3, 4, 3
+; CHECK-PPC32-NEXT:    neg 3, 3
+; CHECK-PPC32-NEXT:    blr
+  %cc = icmp ult i32 %a, %b
+  %carry = zext i1 %cc to i32
+  %res = sub i32 0, %carry
+  ret i32 %res
+}
+
+declare void @use(...)

>From 2a6b25b02404916d04d651358f4e0fc3d14cc166 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 10 Mar 2026 11:06:58 -0400
Subject: [PATCH 2/2] [PPC] Combine sub with carry to SUBE

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 73 ++++++++++++++++++++-
 llvm/test/CodeGen/PowerPC/subfe-combine.ll  | 58 +++++-----------
 2 files changed, 90 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 479c362832234..b5ec84ceae186 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1436,7 +1436,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
-  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
+  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SUB, ISD::XOR, ISD::SHL, ISD::SRA,
                        ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
                        ISD::BUILD_VECTOR});
   if (Subtarget.hasFPCVT())
@@ -17233,6 +17233,73 @@ static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
   return true;
 }
 
+// Attempt to combine the following patterns:
+//   SUB x, (ZEXT (SETCC a, b, ult)) -> SUBE x, 0, (SUBC a, b)
+//   SUB (SUB x, y), (ZEXT (SETCC a, b, ult)) -> SUBE x, y, (SUBC a, b)
+//   SUB x, (ZEXT (SETCC a, b, ugt)) -> SUBE x, 0, (SUBC b, a)
+//   SUB (SUB x, y), (ZEXT (SETCC a, b, ugt)) -> SUBE x, y, (SUBC b, a)
+// PPC carry (CA) is inverted: SUBFC sets CA=1 when there is NO borrow.
+// SUBFE computes ~RA + RB + CA. When CA=0 (borrow), this gives RB-RA-1,
+// which is exactly "subtract with borrow".
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG,
+                                           const PPCSubtarget &Subtarget) {
+  if (N->getOpcode() != ISD::SUB)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != (Subtarget.isPPC64() ? MVT::i64 : MVT::i32))
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Look through ZERO_EXTEND.
+  if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+    N1 = N1.getOperand(0);
+
+  if (!N1.hasOneUse())
+    return SDValue();
+
+  // Only match pre-legalization ISD::SETCC with unsigned conditions.
+  if (N1.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+  SDValue LHS = N1.getOperand(0);
+  SDValue RHS = N1.getOperand(1);
+  if (CC == ISD::SETUGT) {
+    // ugt a, b -> ult b, a (swap operands for SUBC)
+    std::swap(LHS, RHS);
+  } else if (CC != ISD::SETULT) {
+    return SDValue();
+  }
+
+  // Don't combine SUB 0, (ZEXT SETCC) — that's just negating the borrow.
+  if (isNullConstant(N0))
+    return SDValue();
+
+  SDLoc DL(N);
+
+  SDValue N0LHS = N0;
+  SDValue N0RHS = DAG.getConstant(0, DL, VT);
+  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
+    N0LHS = N0.getOperand(0);
+    N0RHS = N0.getOperand(1);
+  }
+
+  // Generate SUBC to set carry: CA = (LHS >= RHS), i.e. CA=0 when LHS < RHS.
+  SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32);
+  SDValue Borrow = DAG.getNode(PPCISD::SUBC, DL, CmpVTs, LHS, RHS);
+  SDValue CarryFlag = Borrow.getValue(1);
+
+  // Generate SUBE: result = ~Y + X + CA = X - Y - !CA.
+  // When CA=0 (borrow), this is X - Y - 1, which is the desired borrow.
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+  SDValue SubE = DAG.getNode(PPCISD::SUBE, DL, VTs, N0LHS, N0RHS, CarryFlag);
+  
+  return SubE.getValue(0);
+}
+
 static SDValue DAGCombineAddc(SDNode *N,
                               llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
   if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
@@ -17414,6 +17481,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   default: break;
   case ISD::ADD:
     return combineADD(N, DCI);
+  case ISD::SUB:
+    if (SDValue Val = performSubWithBorrowCombine(N, DAG, Subtarget))
+      return Val;
+    break;
   case ISD::AND: {
     // We don't want (and (zext (shift...)), C) if C fits in the width of the
     // original input as that will prevent us from selecting optimal rotates.
diff --git a/llvm/test/CodeGen/PowerPC/subfe-combine.ll b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
index 40d1f1fe7ad73..5337ed17eff9c 100644
--- a/llvm/test/CodeGen/PowerPC/subfe-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
@@ -15,12 +15,8 @@ define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
 ;
 ; CHECK-PPC32-LABEL: test_basic_i32:
 ; CHECK-PPC32:       # %bb.0:
-; CHECK-PPC32-NEXT:    cmplw 3, 4
-; CHECK-PPC32-NEXT:    li 3, 0
-; CHECK-PPC32-NEXT:    li 4, 1
-; CHECK-PPC32-NEXT:    isellt 3, 4, 3
-; CHECK-PPC32-NEXT:    sub 4, 5, 6
-; CHECK-PPC32-NEXT:    sub 3, 4, 3
+; CHECK-PPC32-NEXT:    subc 3, 3, 4
+; CHECK-PPC32-NEXT:    subfe 3, 6, 5
 ; CHECK-PPC32-NEXT:    blr
   %cc = icmp ult i32 %a, %b
   %carry = zext i1 %cc to i32
@@ -41,11 +37,9 @@ define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
 ;
 ; CHECK-PPC32-LABEL: test_only_borrow:
 ; CHECK-PPC32:       # %bb.0:
-; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    subc 3, 3, 4
 ; CHECK-PPC32-NEXT:    li 3, 0
-; CHECK-PPC32-NEXT:    li 4, 1
-; CHECK-PPC32-NEXT:    isellt 3, 4, 3
-; CHECK-PPC32-NEXT:    sub 3, 5, 3
+; CHECK-PPC32-NEXT:    subfe 3, 3, 5
 ; CHECK-PPC32-NEXT:    blr
   %cc = icmp ult i32 %a, %b
   %carry = zext i1 %cc to i32
@@ -66,12 +60,8 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
 ;
 ; CHECK-PPC32-LABEL: test_sext_add:
 ; CHECK-PPC32:       # %bb.0:
-; CHECK-PPC32-NEXT:    cmplw 3, 4
-; CHECK-PPC32-NEXT:    sub 3, 5, 6
-; CHECK-PPC32-NEXT:    li 4, 0
-; CHECK-PPC32-NEXT:    li 5, 1
-; CHECK-PPC32-NEXT:    isellt 4, 5, 4
-; CHECK-PPC32-NEXT:    sub 3, 3, 4
+; CHECK-PPC32-NEXT:    subc 3, 3, 4
+; CHECK-PPC32-NEXT:    subfe 3, 6, 5
 ; CHECK-PPC32-NEXT:    blr
   %cc = icmp ult i32 %a, %b
   %carry = sext i1 %cc to i32
@@ -93,12 +83,8 @@ define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
 ;
 ; CHECK-PPC32-LABEL: test_ugt:
 ; CHECK-PPC32:       # %bb.0:
-; CHECK-PPC32-NEXT:    cmplw 3, 4
-; CHECK-PPC32-NEXT:    li 3, 0
-; CHECK-PPC32-NEXT:    li 4, 1
-; CHECK-PPC32-NEXT:    iselgt 3, 4, 3
-; CHECK-PPC32-NEXT:    sub 4, 5, 6
-; CHECK-PPC32-NEXT:    sub 3, 4, 3
+; CHECK-PPC32-NEXT:    subc 3, 4, 3
+; CHECK-PPC32-NEXT:    subfe 3, 6, 5
 ; CHECK-PPC32-NEXT:    blr
   %cc = icmp ugt i32 %a, %b
   %carry = zext i1 %cc to i32
@@ -110,11 +96,8 @@ define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
 define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
 ; CHECK-PPC64LE-LABEL: test_basic_i64:
 ; CHECK-PPC64LE:       # %bb.0:
-; CHECK-PPC64LE-NEXT:    subc 4, 3, 4
-; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
-; CHECK-PPC64LE-NEXT:    subfe 3, 3, 3
-; CHECK-PPC64LE-NEXT:    neg 3, 3
-; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    subc 3, 3, 4
+; CHECK-PPC64LE-NEXT:    subfe 3, 6, 5
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC32-LABEL: test_basic_i64:
@@ -142,10 +125,9 @@ define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
 define i64 @test_only_borrow_i64(i64 %a, i64 %b, i64 %x) {
 ; CHECK-PPC64LE-LABEL: test_only_borrow_i64:
 ; CHECK-PPC64LE:       # %bb.0:
-; CHECK-PPC64LE-NEXT:    subc 4, 3, 4
-; CHECK-PPC64LE-NEXT:    subfe 3, 3, 3
-; CHECK-PPC64LE-NEXT:    neg 3, 3
-; CHECK-PPC64LE-NEXT:    sub 3, 5, 3
+; CHECK-PPC64LE-NEXT:    subc 3, 3, 4
+; CHECK-PPC64LE-NEXT:    li 3, 0
+; CHECK-PPC64LE-NEXT:    subfe 3, 3, 5
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC32-LABEL: test_only_borrow_i64:
@@ -171,10 +153,7 @@ define i64 @test_ugt_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
 ; CHECK-PPC64LE-LABEL: test_ugt_i64:
 ; CHECK-PPC64LE:       # %bb.0:
 ; CHECK-PPC64LE-NEXT:    subc 3, 4, 3
-; CHECK-PPC64LE-NEXT:    subfe 3, 4, 4
-; CHECK-PPC64LE-NEXT:    sub 4, 5, 6
-; CHECK-PPC64LE-NEXT:    neg 3, 3
-; CHECK-PPC64LE-NEXT:    sub 3, 4, 3
+; CHECK-PPC64LE-NEXT:    subfe 3, 6, 5
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC32-LABEL: test_ugt_i64:
@@ -396,13 +375,12 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
 ; CHECK-PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-PPC32-NEXT:    .cfi_offset lr, 4
 ; CHECK-PPC32-NEXT:    .cfi_offset r30, -8
-; CHECK-PPC32-NEXT:    cmplw 3, 4
+; CHECK-PPC32-NEXT:    subc 3, 3, 4
 ; CHECK-PPC32-NEXT:    li 3, 0
-; CHECK-PPC32-NEXT:    li 4, 1
+; CHECK-PPC32-NEXT:    sub 5, 5, 6
 ; CHECK-PPC32-NEXT:    stw 30, 8(1) # 4-byte Folded Spill
-; CHECK-PPC32-NEXT:    isellt 4, 4, 3
-; CHECK-PPC32-NEXT:    sub 3, 5, 6
-; CHECK-PPC32-NEXT:    sub 30, 3, 4
+; CHECK-PPC32-NEXT:    subfe 30, 3, 5
+; CHECK-PPC32-NEXT:    mr 3, 5
 ; CHECK-PPC32-NEXT:    bl use
 ; CHECK-PPC32-NEXT:    mr 3, 30
 ; CHECK-PPC32-NEXT:    lwz 30, 8(1) # 4-byte Folded Reload



More information about the llvm-commits mailing list