[llvm] [PPC] Combine sub with carry to SUBE (PR #185671)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 08:34:43 PDT 2026
https://github.com/SiliconA-Z created https://github.com/llvm/llvm-project/pull/185671
None
>From 3374087dd1caa49a698f503ae2a8209ba17885a9 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 10 Mar 2026 10:39:01 -0400
Subject: [PATCH 1/2] Create subfe-combine.ll
---
llvm/test/CodeGen/PowerPC/subfe-combine.ll | 446 +++++++++++++++++++++
1 file changed, 446 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/subfe-combine.ll
diff --git a/llvm/test/CodeGen/PowerPC/subfe-combine.ll b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
new file mode 100644
index 0000000000000..40d1f1fe7ad73
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
@@ -0,0 +1,446 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck --check-prefix=CHECK-PPC64LE %s
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr7 | FileCheck --check-prefix=CHECK-PPC32 %s
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_basic_i32:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_basic_i32:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: isellt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 4, 5, 6
+; CHECK-PPC32-NEXT: sub 3, 4, 3
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-PPC64LE-LABEL: test_only_borrow:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 3, 5, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_only_borrow:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: isellt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 3, 5, 3
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_sext_add:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_sext_add:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: sub 3, 5, 6
+; CHECK-PPC32-NEXT: li 4, 0
+; CHECK-PPC32-NEXT: li 5, 1
+; CHECK-PPC32-NEXT: isellt 4, 5, 4
+; CHECK-PPC32-NEXT: sub 3, 3, 4
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = sext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = add i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_ugt:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_ugt:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: iselgt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 4, 5, 6
+; CHECK-PPC32-NEXT: sub 3, 4, 3
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ugt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-PPC64LE-LABEL: test_basic_i64:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: subc 4, 3, 4
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: subfe 3, 3, 3
+; CHECK-PPC64LE-NEXT: neg 3, 3
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_basic_i64:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 5
+; CHECK-PPC32-NEXT: cmplw 1, 4, 6
+; CHECK-PPC32-NEXT: li 3, 1
+; CHECK-PPC32-NEXT: subc 4, 8, 10
+; CHECK-PPC32-NEXT: crandc 20, 0, 2
+; CHECK-PPC32-NEXT: crand 21, 2, 4
+; CHECK-PPC32-NEXT: li 5, 0
+; CHECK-PPC32-NEXT: subfe 6, 9, 7
+; CHECK-PPC32-NEXT: crnor 20, 21, 20
+; CHECK-PPC32-NEXT: isel 3, 0, 3, 20
+; CHECK-PPC32-NEXT: subc 4, 4, 3
+; CHECK-PPC32-NEXT: subfe 3, 5, 6
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i64 @test_only_borrow_i64(i64 %a, i64 %b, i64 %x) {
+; CHECK-PPC64LE-LABEL: test_only_borrow_i64:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: subc 4, 3, 4
+; CHECK-PPC64LE-NEXT: subfe 3, 3, 3
+; CHECK-PPC64LE-NEXT: neg 3, 3
+; CHECK-PPC64LE-NEXT: sub 3, 5, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_only_borrow_i64:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 5
+; CHECK-PPC32-NEXT: cmplw 1, 4, 6
+; CHECK-PPC32-NEXT: li 3, 1
+; CHECK-PPC32-NEXT: li 5, 0
+; CHECK-PPC32-NEXT: crandc 20, 0, 2
+; CHECK-PPC32-NEXT: crand 21, 2, 4
+; CHECK-PPC32-NEXT: crnor 20, 21, 20
+; CHECK-PPC32-NEXT: isel 3, 0, 3, 20
+; CHECK-PPC32-NEXT: subc 4, 8, 3
+; CHECK-PPC32-NEXT: subfe 3, 5, 7
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %res = sub i64 %x, %carry
+ ret i64 %res
+}
+
+define i64 @test_ugt_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-PPC64LE-LABEL: test_ugt_i64:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: subc 3, 4, 3
+; CHECK-PPC64LE-NEXT: subfe 3, 4, 4
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: neg 3, 3
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_ugt_i64:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 5
+; CHECK-PPC32-NEXT: cmplw 1, 4, 6
+; CHECK-PPC32-NEXT: li 3, 1
+; CHECK-PPC32-NEXT: subc 4, 8, 10
+; CHECK-PPC32-NEXT: crandc 20, 1, 2
+; CHECK-PPC32-NEXT: crand 21, 2, 5
+; CHECK-PPC32-NEXT: li 5, 0
+; CHECK-PPC32-NEXT: subfe 6, 9, 7
+; CHECK-PPC32-NEXT: crnor 20, 21, 20
+; CHECK-PPC32-NEXT: isel 3, 0, 3, 20
+; CHECK-PPC32-NEXT: subc 4, 4, 3
+; CHECK-PPC32-NEXT: subfe 3, 5, 6
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ugt i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+; Negative test: signed comparison should not be combined
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_unsupported_cc_slt:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: extsw 3, 3
+; CHECK-PPC64LE-NEXT: extsw 4, 4
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_unsupported_cc_slt:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmpw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: isellt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 4, 5, 6
+; CHECK-PPC32-NEXT: sub 3, 4, 3
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp slt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+; Negative test: signed comparison should not be combined
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_unsupported_cc_sgt:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: extsw 4, 4
+; CHECK-PPC64LE-NEXT: extsw 3, 3
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_unsupported_cc_sgt:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmpw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: iselgt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 4, 5, 6
+; CHECK-PPC32-NEXT: sub 3, 4, 3
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp sgt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+; Negative test: multiple uses of setcc prevents combine
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-PPC64LE-LABEL: test_multiple_setcc_uses:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: mflr 0
+; CHECK-PPC64LE-NEXT: .cfi_def_cfa_offset 48
+; CHECK-PPC64LE-NEXT: .cfi_offset lr, 16
+; CHECK-PPC64LE-NEXT: .cfi_offset r30, -16
+; CHECK-PPC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; CHECK-PPC64LE-NEXT: stdu 1, -48(1)
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: std 0, 64(1)
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 30, 5, 3
+; CHECK-PPC64LE-NEXT: bl use
+; CHECK-PPC64LE-NEXT: nop
+; CHECK-PPC64LE-NEXT: mr 3, 30
+; CHECK-PPC64LE-NEXT: addi 1, 1, 48
+; CHECK-PPC64LE-NEXT: ld 0, 16(1)
+; CHECK-PPC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-PPC64LE-NEXT: mtlr 0
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_multiple_setcc_uses:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: mflr 0
+; CHECK-PPC32-NEXT: stwu 1, -16(1)
+; CHECK-PPC32-NEXT: stw 0, 20(1)
+; CHECK-PPC32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-PPC32-NEXT: .cfi_offset lr, 4
+; CHECK-PPC32-NEXT: .cfi_offset r30, -8
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: stw 30, 8(1) # 4-byte Folded Spill
+; CHECK-PPC32-NEXT: isellt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 30, 5, 3
+; CHECK-PPC32-NEXT: bl use
+; CHECK-PPC32-NEXT: mr 3, 30
+; CHECK-PPC32-NEXT: lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK-PPC32-NEXT: lwz 0, 20(1)
+; CHECK-PPC32-NEXT: addi 1, 1, 16
+; CHECK-PPC32-NEXT: mtlr 0
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i1 %cc)
+ ret i32 %res
+}
+
+; Negative test: multiple uses of carry prevents combine
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-PPC64LE-LABEL: test_multiple_carry_uses:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: mflr 0
+; CHECK-PPC64LE-NEXT: .cfi_def_cfa_offset 48
+; CHECK-PPC64LE-NEXT: .cfi_offset lr, 16
+; CHECK-PPC64LE-NEXT: .cfi_offset r30, -16
+; CHECK-PPC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; CHECK-PPC64LE-NEXT: stdu 1, -48(1)
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: std 0, 64(1)
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 30, 5, 3
+; CHECK-PPC64LE-NEXT: bl use
+; CHECK-PPC64LE-NEXT: nop
+; CHECK-PPC64LE-NEXT: mr 3, 30
+; CHECK-PPC64LE-NEXT: addi 1, 1, 48
+; CHECK-PPC64LE-NEXT: ld 0, 16(1)
+; CHECK-PPC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-PPC64LE-NEXT: mtlr 0
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_multiple_carry_uses:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: mflr 0
+; CHECK-PPC32-NEXT: stwu 1, -16(1)
+; CHECK-PPC32-NEXT: stw 0, 20(1)
+; CHECK-PPC32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-PPC32-NEXT: .cfi_offset lr, 4
+; CHECK-PPC32-NEXT: .cfi_offset r30, -8
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: stw 30, 8(1) # 4-byte Folded Spill
+; CHECK-PPC32-NEXT: isellt 3, 4, 3
+; CHECK-PPC32-NEXT: sub 30, 5, 3
+; CHECK-PPC32-NEXT: bl use
+; CHECK-PPC32-NEXT: mr 3, 30
+; CHECK-PPC32-NEXT: lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK-PPC32-NEXT: lwz 0, 20(1)
+; CHECK-PPC32-NEXT: addi 1, 1, 16
+; CHECK-PPC32-NEXT: mtlr 0
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i32 %carry)
+ ret i32 %res
+}
+
+; The inner sub has multiple uses; we can still combine
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-PPC64LE-LABEL: test_multiple_sub_uses:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: mflr 0
+; CHECK-PPC64LE-NEXT: .cfi_def_cfa_offset 48
+; CHECK-PPC64LE-NEXT: .cfi_offset lr, 16
+; CHECK-PPC64LE-NEXT: .cfi_offset r30, -16
+; CHECK-PPC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; CHECK-PPC64LE-NEXT: stdu 1, -48(1)
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: std 0, 64(1)
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: sub 4, 5, 6
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: sub 30, 4, 3
+; CHECK-PPC64LE-NEXT: clrldi 3, 4, 32
+; CHECK-PPC64LE-NEXT: bl use
+; CHECK-PPC64LE-NEXT: nop
+; CHECK-PPC64LE-NEXT: mr 3, 30
+; CHECK-PPC64LE-NEXT: addi 1, 1, 48
+; CHECK-PPC64LE-NEXT: ld 0, 16(1)
+; CHECK-PPC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-PPC64LE-NEXT: mtlr 0
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_multiple_sub_uses:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: mflr 0
+; CHECK-PPC32-NEXT: stwu 1, -16(1)
+; CHECK-PPC32-NEXT: stw 0, 20(1)
+; CHECK-PPC32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-PPC32-NEXT: .cfi_offset lr, 4
+; CHECK-PPC32-NEXT: .cfi_offset r30, -8
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: stw 30, 8(1) # 4-byte Folded Spill
+; CHECK-PPC32-NEXT: isellt 4, 4, 3
+; CHECK-PPC32-NEXT: sub 3, 5, 6
+; CHECK-PPC32-NEXT: sub 30, 3, 4
+; CHECK-PPC32-NEXT: bl use
+; CHECK-PPC32-NEXT: mr 3, 30
+; CHECK-PPC32-NEXT: lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK-PPC32-NEXT: lwz 0, 20(1)
+; CHECK-PPC32-NEXT: addi 1, 1, 16
+; CHECK-PPC32-NEXT: mtlr 0
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ tail call void @use(i32 %sub)
+ ret i32 %res
+}
+
+; Negative test: sub from zero should not combine (result would be negative carry)
+define i32 @test_sub_from_zero(i32 %a, i32 %b) {
+; CHECK-PPC64LE-LABEL: test_sub_from_zero:
+; CHECK-PPC64LE: # %bb.0:
+; CHECK-PPC64LE-NEXT: clrldi 3, 3, 32
+; CHECK-PPC64LE-NEXT: clrldi 4, 4, 32
+; CHECK-PPC64LE-NEXT: sub 3, 3, 4
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 1, 63
+; CHECK-PPC64LE-NEXT: neg 3, 3
+; CHECK-PPC64LE-NEXT: blr
+;
+; CHECK-PPC32-LABEL: test_sub_from_zero:
+; CHECK-PPC32: # %bb.0:
+; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: li 3, 0
+; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: isellt 3, 4, 3
+; CHECK-PPC32-NEXT: neg 3, 3
+; CHECK-PPC32-NEXT: blr
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 0, %carry
+ ret i32 %res
+}
+
+declare void @use(...)
>From 2a6b25b02404916d04d651358f4e0fc3d14cc166 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 10 Mar 2026 11:06:58 -0400
Subject: [PATCH 2/2] [PPC] Combine sub with carry to SUBE
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 73 ++++++++++++++++++++-
llvm/test/CodeGen/PowerPC/subfe-combine.ll | 58 +++++-----------
2 files changed, 90 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 479c362832234..b5ec84ceae186 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1436,7 +1436,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
+ setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SUB, ISD::XOR, ISD::SHL, ISD::SRA,
ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
ISD::BUILD_VECTOR});
if (Subtarget.hasFPCVT())
@@ -17233,6 +17233,73 @@ static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
return true;
}
+// Attempt to combine the following patterns:
+// SUB x, (ZEXT (SETCC a, b, ult)) -> SUBE x, 0, (SUBC a, b)
+// SUB (SUB x, y), (ZEXT (SETCC a, b, ult)) -> SUBE x, y, (SUBC a, b)
+// SUB x, (ZEXT (SETCC a, b, ugt)) -> SUBE x, 0, (SUBC b, a)
+// SUB (SUB x, y), (ZEXT (SETCC a, b, ugt)) -> SUBE x, y, (SUBC b, a)
+// PPC carry (CA) is inverted: SUBFC sets CA=1 when there is NO borrow.
+// SUBFE computes ~RA + RB + CA. When CA=0 (borrow), this gives RB-RA-1,
+// which is exactly "subtract with borrow".
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != (Subtarget.isPPC64() ? MVT::i64 : MVT::i32))
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Look through ZERO_EXTEND.
+ if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+ N1 = N1.getOperand(0);
+
+ if (!N1.hasOneUse())
+ return SDValue();
+
+ // Only match pre-legalization ISD::SETCC with unsigned conditions.
+ if (N1.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+ SDValue LHS = N1.getOperand(0);
+ SDValue RHS = N1.getOperand(1);
+ if (CC == ISD::SETUGT) {
+ // ugt a, b -> ult b, a (swap operands for SUBC)
+ std::swap(LHS, RHS);
+ } else if (CC != ISD::SETULT) {
+ return SDValue();
+ }
+
+ // Don't combine SUB 0, (ZEXT SETCC) — that's just negating the borrow.
+ if (isNullConstant(N0))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ SDValue N0LHS = N0;
+ SDValue N0RHS = DAG.getConstant(0, DL, VT);
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
+ N0LHS = N0.getOperand(0);
+ N0RHS = N0.getOperand(1);
+ }
+
+ // Generate SUBC to set carry: CA = (LHS >= RHS), i.e. CA=0 when LHS < RHS.
+ SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32);
+ SDValue Borrow = DAG.getNode(PPCISD::SUBC, DL, CmpVTs, LHS, RHS);
+ SDValue CarryFlag = Borrow.getValue(1);
+
+ // Generate SUBE: result = ~Y + X + CA = X - Y - !CA.
+ // When CA=0 (borrow), this is X - Y - 1, which is the desired borrow.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ SDValue SubE = DAG.getNode(PPCISD::SUBE, DL, VTs, N0LHS, N0RHS, CarryFlag);
+
+ return SubE.getValue(0);
+}
+
static SDValue DAGCombineAddc(SDNode *N,
llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
@@ -17414,6 +17481,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD:
return combineADD(N, DCI);
+ case ISD::SUB:
+ if (SDValue Val = performSubWithBorrowCombine(N, DAG, Subtarget))
+ return Val;
+ break;
case ISD::AND: {
// We don't want (and (zext (shift...)), C) if C fits in the width of the
// original input as that will prevent us from selecting optimal rotates.
diff --git a/llvm/test/CodeGen/PowerPC/subfe-combine.ll b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
index 40d1f1fe7ad73..5337ed17eff9c 100644
--- a/llvm/test/CodeGen/PowerPC/subfe-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/subfe-combine.ll
@@ -15,12 +15,8 @@ define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
;
; CHECK-PPC32-LABEL: test_basic_i32:
; CHECK-PPC32: # %bb.0:
-; CHECK-PPC32-NEXT: cmplw 3, 4
-; CHECK-PPC32-NEXT: li 3, 0
-; CHECK-PPC32-NEXT: li 4, 1
-; CHECK-PPC32-NEXT: isellt 3, 4, 3
-; CHECK-PPC32-NEXT: sub 4, 5, 6
-; CHECK-PPC32-NEXT: sub 3, 4, 3
+; CHECK-PPC32-NEXT: subc 3, 3, 4
+; CHECK-PPC32-NEXT: subfe 3, 6, 5
; CHECK-PPC32-NEXT: blr
%cc = icmp ult i32 %a, %b
%carry = zext i1 %cc to i32
@@ -41,11 +37,9 @@ define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
;
; CHECK-PPC32-LABEL: test_only_borrow:
; CHECK-PPC32: # %bb.0:
-; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: subc 3, 3, 4
; CHECK-PPC32-NEXT: li 3, 0
-; CHECK-PPC32-NEXT: li 4, 1
-; CHECK-PPC32-NEXT: isellt 3, 4, 3
-; CHECK-PPC32-NEXT: sub 3, 5, 3
+; CHECK-PPC32-NEXT: subfe 3, 3, 5
; CHECK-PPC32-NEXT: blr
%cc = icmp ult i32 %a, %b
%carry = zext i1 %cc to i32
@@ -66,12 +60,8 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
;
; CHECK-PPC32-LABEL: test_sext_add:
; CHECK-PPC32: # %bb.0:
-; CHECK-PPC32-NEXT: cmplw 3, 4
-; CHECK-PPC32-NEXT: sub 3, 5, 6
-; CHECK-PPC32-NEXT: li 4, 0
-; CHECK-PPC32-NEXT: li 5, 1
-; CHECK-PPC32-NEXT: isellt 4, 5, 4
-; CHECK-PPC32-NEXT: sub 3, 3, 4
+; CHECK-PPC32-NEXT: subc 3, 3, 4
+; CHECK-PPC32-NEXT: subfe 3, 6, 5
; CHECK-PPC32-NEXT: blr
%cc = icmp ult i32 %a, %b
%carry = sext i1 %cc to i32
@@ -93,12 +83,8 @@ define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
;
; CHECK-PPC32-LABEL: test_ugt:
; CHECK-PPC32: # %bb.0:
-; CHECK-PPC32-NEXT: cmplw 3, 4
-; CHECK-PPC32-NEXT: li 3, 0
-; CHECK-PPC32-NEXT: li 4, 1
-; CHECK-PPC32-NEXT: iselgt 3, 4, 3
-; CHECK-PPC32-NEXT: sub 4, 5, 6
-; CHECK-PPC32-NEXT: sub 3, 4, 3
+; CHECK-PPC32-NEXT: subc 3, 4, 3
+; CHECK-PPC32-NEXT: subfe 3, 6, 5
; CHECK-PPC32-NEXT: blr
%cc = icmp ugt i32 %a, %b
%carry = zext i1 %cc to i32
@@ -110,11 +96,8 @@ define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
; CHECK-PPC64LE-LABEL: test_basic_i64:
; CHECK-PPC64LE: # %bb.0:
-; CHECK-PPC64LE-NEXT: subc 4, 3, 4
-; CHECK-PPC64LE-NEXT: sub 4, 5, 6
-; CHECK-PPC64LE-NEXT: subfe 3, 3, 3
-; CHECK-PPC64LE-NEXT: neg 3, 3
-; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: subc 3, 3, 4
+; CHECK-PPC64LE-NEXT: subfe 3, 6, 5
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC32-LABEL: test_basic_i64:
@@ -142,10 +125,9 @@ define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
define i64 @test_only_borrow_i64(i64 %a, i64 %b, i64 %x) {
; CHECK-PPC64LE-LABEL: test_only_borrow_i64:
; CHECK-PPC64LE: # %bb.0:
-; CHECK-PPC64LE-NEXT: subc 4, 3, 4
-; CHECK-PPC64LE-NEXT: subfe 3, 3, 3
-; CHECK-PPC64LE-NEXT: neg 3, 3
-; CHECK-PPC64LE-NEXT: sub 3, 5, 3
+; CHECK-PPC64LE-NEXT: subc 3, 3, 4
+; CHECK-PPC64LE-NEXT: li 3, 0
+; CHECK-PPC64LE-NEXT: subfe 3, 3, 5
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC32-LABEL: test_only_borrow_i64:
@@ -171,10 +153,7 @@ define i64 @test_ugt_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
; CHECK-PPC64LE-LABEL: test_ugt_i64:
; CHECK-PPC64LE: # %bb.0:
; CHECK-PPC64LE-NEXT: subc 3, 4, 3
-; CHECK-PPC64LE-NEXT: subfe 3, 4, 4
-; CHECK-PPC64LE-NEXT: sub 4, 5, 6
-; CHECK-PPC64LE-NEXT: neg 3, 3
-; CHECK-PPC64LE-NEXT: sub 3, 4, 3
+; CHECK-PPC64LE-NEXT: subfe 3, 6, 5
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC32-LABEL: test_ugt_i64:
@@ -396,13 +375,12 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
; CHECK-PPC32-NEXT: .cfi_def_cfa_offset 16
; CHECK-PPC32-NEXT: .cfi_offset lr, 4
; CHECK-PPC32-NEXT: .cfi_offset r30, -8
-; CHECK-PPC32-NEXT: cmplw 3, 4
+; CHECK-PPC32-NEXT: subc 3, 3, 4
; CHECK-PPC32-NEXT: li 3, 0
-; CHECK-PPC32-NEXT: li 4, 1
+; CHECK-PPC32-NEXT: sub 5, 5, 6
; CHECK-PPC32-NEXT: stw 30, 8(1) # 4-byte Folded Spill
-; CHECK-PPC32-NEXT: isellt 4, 4, 3
-; CHECK-PPC32-NEXT: sub 3, 5, 6
-; CHECK-PPC32-NEXT: sub 30, 3, 4
+; CHECK-PPC32-NEXT: subfe 30, 3, 5
+; CHECK-PPC32-NEXT: mr 3, 5
; CHECK-PPC32-NEXT: bl use
; CHECK-PPC32-NEXT: mr 3, 30
; CHECK-PPC32-NEXT: lwz 30, 8(1) # 4-byte Folded Reload
More information about the llvm-commits
mailing list