[llvm] [X86] fuse constant addition after sbb (PR #184541)
Takashi Idobe via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 18:48:20 PDT 2026
https://github.com/Takashiidobe updated https://github.com/llvm/llvm-project/pull/184541
>From 877ed909ddf9a9ea3780d72693a009c598088ba8 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Fri, 6 Mar 2026 16:47:08 -0500
Subject: [PATCH 1/4] fuse constant addition after sbb for x86 only
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1ebfd5defdc40..e57e1dbdfda3e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59647,6 +59647,22 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
Op0.getOperand(0), Op0.getOperand(2));
}
+ // Fold ADD(SBB(Y,0,W),C) -> SBB(Y,-C,W)
+ // SBB(Y,0,W) = Y - 0 - CF = Y - CF; adding C gives Y - CF + C = Y - (-C) -
+ // CF. The SBB flags output must be dead: changing the subtrahend from 0 to -C
+ // produces different EFLAGS bits.
+ if (Op0.getOpcode() == X86ISD::SBB && Op0->hasOneUse() &&
+ X86::isZeroNode(Op0.getOperand(1)) && !Op0->hasAnyUseOfValue(1)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op1)) {
+ SDLoc SBBLoc(Op0);
+ return DAG
+ .getNode(X86ISD::SBB, SBBLoc, Op0->getVTList(), Op0.getOperand(0),
+ DAG.getConstant(-C->getAPIntValue(), SBBLoc, VT),
+ Op0.getOperand(2))
+ .getValue(0);
+ }
+ }
+
if (SDValue IFMA52 = matchVPMADD52(N, DAG, DL, VT, Subtarget))
return IFMA52;
>From e6dbeb44fa5222171f135965dbed6509f724b9b3 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sat, 7 Mar 2026 10:28:04 -0500
Subject: [PATCH 2/4] add tests and fix up test breakage due to SBB change
---
.../CodeGen/X86/apx/long-instruction-fixup.ll | 8 +-
llvm/test/CodeGen/X86/apx/sbb.ll | 54 +++++-------
llvm/test/CodeGen/X86/sbb-add-constant.ll | 84 +++++++++++++++++++
llvm/test/CodeGen/X86/select_const.ll | 4 +-
4 files changed, 110 insertions(+), 40 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/sbb-add-constant.ll
diff --git a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
index 30c485836797f..839ec45051367 100644
--- a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
+++ b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
@@ -169,8 +169,8 @@ define i32 @sbb32mi_GS(i32 %x, i32 %y) {
; CHECK-LABEL: sbb32mi_GS:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl %edi, %esi
-; CHECK-NEXT: sbbl $0, %gs:255, %eax
-; CHECK-NEXT: addl $-123456, %eax # imm = 0xFFFE1DC0
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: sbbl $123456, %eax # imm = 0x1E240
; CHECK-NEXT: retq
entry:
%a= inttoptr i32 255 to ptr addrspace(256)
@@ -186,8 +186,8 @@ define i64 @sbb64mi_FS(i64 %x, i64 %y) {
; CHECK-LABEL: sbb64mi_FS:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpq %rdi, %rsi
-; CHECK-NEXT: sbbq $0, %fs:255, %rax
-; CHECK-NEXT: addq $-123456, %rax # imm = 0xFFFE1DC0
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: sbbq $123456, %rax # imm = 0x1E240
; CHECK-NEXT: retq
entry:
%a= inttoptr i64 255 to ptr addrspace(257)
diff --git a/llvm/test/CodeGen/X86/apx/sbb.ll b/llvm/test/CodeGen/X86/apx/sbb.ll
index a67419bbd5db7..0b009c6cbcdb5 100644
--- a/llvm/test/CodeGen/X86/apx/sbb.ll
+++ b/llvm/test/CodeGen/X86/apx/sbb.ll
@@ -113,8 +113,7 @@ define i16 @sbb16ri8(i16 %a, i16 %x, i16 %y) nounwind {
; CHECK-LABEL: sbb16ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT: sbbw $0, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT: addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85]
+; CHECK-NEXT: sbbw $123, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i16 %a, 123
%k = icmp ugt i16 %x, %y
@@ -127,8 +126,7 @@ define i32 @sbb32ri8(i32 %a, i32 %x, i32 %y) nounwind {
; CHECK-LABEL: sbb32ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT: sbbl $0, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85]
+; CHECK-NEXT: sbbl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i32 %a, 123
%k = icmp ugt i32 %x, %y
@@ -141,8 +139,7 @@ define i64 @sbb64ri8(i64 %a, i64 %x, i64 %y) nounwind {
; CHECK-LABEL: sbb64ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT: addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85]
+; CHECK-NEXT: sbbq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i64 %a, 123
%k = icmp ugt i64 %x, %y
@@ -155,8 +152,7 @@ define i8 @sbb8ri(i8 %a, i8 %x, i8 %y) nounwind {
; CHECK-LABEL: sbb8ri:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpb %sil, %dl # encoding: [0x40,0x38,0xf2]
-; CHECK-NEXT: sbbb $0, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xdf,0x00]
-; CHECK-NEXT: addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85]
+; CHECK-NEXT: sbbb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xdf,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i8 %a, 123
%k = icmp ugt i8 %x, %y
@@ -169,9 +165,8 @@ define i16 @sbb16ri(i16 %a, i16 %x, i16 %y) nounwind {
; CHECK-LABEL: sbb16ri:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT: sbbw $0, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT: addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb]
-; CHECK-NEXT: # imm = 0xFB2E
+; CHECK-NEXT: sbbw $1234, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0xdf,0xd2,0x04]
+; CHECK-NEXT: # imm = 0x4D2
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i16 %a, 1234
%k = icmp ugt i16 %x, %y
@@ -184,9 +179,8 @@ define i32 @sbb32ri(i32 %a, i32 %x, i32 %y) nounwind {
; CHECK-LABEL: sbb32ri:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT: sbbl $0, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT: addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT: # imm = 0xFFFE1DC0
+; CHECK-NEXT: sbbl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xdf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i32 %a, 123456
%k = icmp ugt i32 %x, %y
@@ -199,9 +193,8 @@ define i64 @sbb64ri(i64 %a, i64 %x, i64 %y) nounwind {
; CHECK-LABEL: sbb64ri:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT: addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT: # imm = 0xFFFE1DC0
+; CHECK-NEXT: sbbq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xdf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: retq # encoding: [0xc3]
%s = sub i64 %a, 123456
%k = icmp ugt i64 %x, %y
@@ -270,8 +263,7 @@ define i16 @sbb16mi8(ptr %ptr, i16 %x, i16 %y) nounwind {
; CHECK-LABEL: sbb16mi8:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT: sbbw $0, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT: addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85]
+; CHECK-NEXT: sbbw $123, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i16, ptr %ptr
%s = sub i16 %a, 123
@@ -285,8 +277,7 @@ define i32 @sbb32mi8(ptr %ptr, i32 %x, i32 %y) nounwind {
; CHECK-LABEL: sbb32mi8:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT: sbbl $0, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85]
+; CHECK-NEXT: sbbl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
%s = sub i32 %a, 123
@@ -300,8 +291,7 @@ define i64 @sbb64mi8(ptr %ptr, i64 %x, i64 %y) nounwind {
; CHECK-LABEL: sbb64mi8:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT: sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT: addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85]
+; CHECK-NEXT: sbbq $123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
%s = sub i64 %a, 123
@@ -315,8 +305,7 @@ define i8 @sbb8mi(ptr %ptr, i8 %x, i8 %y) nounwind {
; CHECK-LABEL: sbb8mi:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpb %sil, %dl # encoding: [0x40,0x38,0xf2]
-; CHECK-NEXT: sbbb $0, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x1f,0x00]
-; CHECK-NEXT: addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85]
+; CHECK-NEXT: sbbb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x1f,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i8, ptr %ptr
%s = sub i8 %a, 123
@@ -330,9 +319,8 @@ define i16 @sbb16mi(ptr %ptr, i16 %x, i16 %y) nounwind {
; CHECK-LABEL: sbb16mi:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT: sbbw $0, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT: addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb]
-; CHECK-NEXT: # imm = 0xFB2E
+; CHECK-NEXT: sbbw $1234, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0x1f,0xd2,0x04]
+; CHECK-NEXT: # imm = 0x4D2
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i16, ptr %ptr
%s = sub i16 %a, 1234
@@ -346,9 +334,8 @@ define i32 @sbb32mi(ptr %ptr, i32 %x, i32 %y) nounwind {
; CHECK-LABEL: sbb32mi:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT: sbbl $0, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT: addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT: # imm = 0xFFFE1DC0
+; CHECK-NEXT: sbbl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x1f,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
%s = sub i32 %a, 123456
@@ -362,9 +349,8 @@ define i64 @sbb64mi(ptr %ptr, i64 %x, i64 %y) nounwind {
; CHECK-LABEL: sbb64mi:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT: sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT: addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT: # imm = 0xFFFE1DC0
+; CHECK-NEXT: sbbq $123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x1f,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
%s = sub i64 %a, 123456
diff --git a/llvm/test/CodeGen/X86/sbb-add-constant.ll b/llvm/test/CodeGen/X86/sbb-add-constant.ll
new file mode 100644
index 0000000000000..e6e95029fa3b2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sbb-add-constant.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+;
+; Verify that ADD(SBB(Y,0,flags),C) folds to SBB(Y,-C,flags).
+; SBB(Y,0) = Y - CF; adding C gives Y - CF + C = Y - (-C) - CF = SBB(Y,-C).
+;
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
+declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64)
+
+; Fold should fire because all conditions are met
+define i64 @g_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: g_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: subq %rsi, %rax
+; CHECK-NEXT: sbbq $-10, %rax
+; CHECK-NEXT: retq
+ %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %val = extractvalue { i64, i1 } %ov, 0
+ %bit = extractvalue { i64, i1 } %ov, 1
+ %ext = sext i1 %bit to i64
+ %r = add i64 %val, %ext
+ %r2 = add i64 %r, 10
+ ret i64 %r2
+}
+
+; Non-constant addend, fold should not fire
+define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: g_nonconstant:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: leaq (%rdi,%rdx), %rax
+; CHECK-NEXT: retq
+ %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %val = extractvalue { i64, i1 } %ov, 0
+ %bit = extractvalue { i64, i1 } %ov, 1
+ %ext = sext i1 %bit to i64
+ %r = add i64 %val, %ext
+ %r2 = add i64 %r, %c
+ ret i64 %r2
+}
+
+; Multiple uses of SBB result, fold should not fire
+define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) {
+; CHECK-LABEL: g_multi_use:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: movq %rdi, (%rdx)
+; CHECK-NEXT: leaq 10(%rdi), %rax
+; CHECK-NEXT: retq
+ %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %val = extractvalue { i64, i1 } %ov, 0
+ %bit = extractvalue { i64, i1 } %ov, 1
+ %ext = sext i1 %bit to i64
+ %sbb = add i64 %val, %ext
+ store i64 %sbb, ptr %out
+ %r = add i64 %sbb, 10
+ ret i64 %r
+}
+
+; Flags live across the low-limb constant add into the next SBB in the chain.
+; Fold should not fire.
+define {i64, i64} @g_flags_live(i64 %a_lo, i64 %a_hi, i64 %b_lo, i64 %b_hi) {
+; CHECK-LABEL: g_flags_live:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq %rdx, %rdi
+; CHECK-NEXT: leaq 10(%rdi), %rax
+; CHECK-NEXT: sbbq %rcx, %rsi
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: retq
+ %lo = call { i8, i64 } @llvm.x86.subborrow.64(i8 0, i64 %a_lo, i64 %b_lo)
+ %lo_b = extractvalue { i8, i64 } %lo, 0
+ %lo_val = extractvalue { i8, i64 } %lo, 1
+ %lo_plus = add i64 %lo_val, 10
+
+ %hi = call { i8, i64 } @llvm.x86.subborrow.64(i8 %lo_b, i64 %a_hi, i64 %b_hi)
+ %hi_val = extractvalue { i8, i64 } %hi, 1
+
+ %ret = insertvalue {i64, i64} poison, i64 %lo_plus, 0
+ %ret2 = insertvalue {i64, i64} %ret, i64 %hi_val, 1
+ ret {i64, i64} %ret2
+}
diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll
index 35f4655dd6d7c..a7da07f1ae5df 100644
--- a/llvm/test/CodeGen/X86/select_const.ll
+++ b/llvm/test/CodeGen/X86/select_const.ll
@@ -439,9 +439,9 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
;
; X64-LABEL: sel_1_2:
; X64: # %bb.0:
+; X64-NEXT: movq %rsi, %rax
; X64-NEXT: cmpq $42, %rdi
-; X64-NEXT: sbbq $0, %rsi
-; X64-NEXT: leaq 2(%rsi), %rax
+; X64-NEXT: sbbq $-2, %rax
; X64-NEXT: retq
%cmp = icmp ult i64 %x, 42
%sel = select i1 %cmp, i64 1, i64 2
>From 56bbcbc1a782c6632944787a7c5b57a2a1ad1345 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Mon, 9 Mar 2026 21:47:37 -0400
Subject: [PATCH 3/4] allow x86 implementation of sbb fold to also take
non-constant operand and handle swapping operands if necessary
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e57e1dbdfda3e..6eb6410960af7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59651,16 +59651,17 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
// SBB(Y,0,W) = Y - 0 - CF = Y - CF; adding C gives Y - CF + C = Y - (-C) -
// CF. The SBB flags output must be dead: changing the subtrahend from 0 to -C
// produces different EFLAGS bits.
- if (Op0.getOpcode() == X86ISD::SBB && Op0->hasOneUse() &&
- X86::isZeroNode(Op0.getOperand(1)) && !Op0->hasAnyUseOfValue(1)) {
- if (auto *C = dyn_cast<ConstantSDNode>(Op1)) {
- SDLoc SBBLoc(Op0);
- return DAG
- .getNode(X86ISD::SBB, SBBLoc, Op0->getVTList(), Op0.getOperand(0),
- DAG.getConstant(-C->getAPIntValue(), SBBLoc, VT),
- Op0.getOperand(2))
- .getValue(0);
- }
+ SDValue SBB = Op0;
+ SDValue C = Op1;
+ if (SBB.getOpcode() != X86ISD::SBB)
+ std::swap(SBB, C);
+ if (SBB.getOpcode() == X86ISD::SBB && SBB->hasOneUse() &&
+ X86::isZeroNode(SBB.getOperand(1)) && !SBB->hasAnyUseOfValue(1)) {
+ SDLoc SBBLoc(SBB);
+ return DAG
+ .getNode(X86ISD::SBB, SBBLoc, SBB->getVTList(), SBB.getOperand(0),
+ DAG.getNegative(C, SBBLoc, VT), SBB.getOperand(2))
+ .getValue(0);
}
if (SDValue IFMA52 = matchVPMADD52(N, DAG, DL, VT, Subtarget))
>From b5b37dfada73e7db3e88839fd9f1c693da181513 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Mon, 9 Mar 2026 21:47:50 -0400
Subject: [PATCH 4/4] add tests for sbb fold
---
llvm/test/CodeGen/X86/sbb-add-constant.ll | 119 ++++++++++++++++++++--
1 file changed, 113 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/X86/sbb-add-constant.ll b/llvm/test/CodeGen/X86/sbb-add-constant.ll
index e6e95029fa3b2..5a4f547ca5894 100644
--- a/llvm/test/CodeGen/X86/sbb-add-constant.ll
+++ b/llvm/test/CodeGen/X86/sbb-add-constant.ll
@@ -4,8 +4,6 @@
; Verify that ADD(SBB(Y,0,flags),C) folds to SBB(Y,-C,flags).
; SBB(Y,0) = Y - CF; adding C gives Y - CF + C = Y - (-C) - CF = SBB(Y,-C).
;
-declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
-declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64)
; Fold should fire because all conditions are met
define i64 @g_i64(i64 %a, i64 %b) {
@@ -24,13 +22,31 @@ define i64 @g_i64(i64 %a, i64 %b) {
ret i64 %r2
}
-; Non-constant addend, fold should not fire
+; Fold should fire because all conditions are met
+define i32 @g_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: g_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: subl %esi, %eax
+; CHECK-NEXT: sbbl $-10, %eax
+; CHECK-NEXT: retq
+ %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+ %val = extractvalue { i32, i1 } %ov, 0
+ %bit = extractvalue { i32, i1 } %ov, 1
+ %ext = sext i1 %bit to i32
+ %r = add i32 %val, %ext
+ %r2 = add i32 %r, 10
+ ret i32 %r2
+}
+
+; Non-constant addend, fold should still fire.
define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: g_nonconstant:
; CHECK: # %bb.0:
-; CHECK-NEXT: subq %rsi, %rdi
-; CHECK-NEXT: sbbq $0, %rdi
-; CHECK-NEXT: leaq (%rdi,%rdx), %rax
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: negq %rdx
+; CHECK-NEXT: subq %rsi, %rax
+; CHECK-NEXT: sbbq %rdx, %rax
; CHECK-NEXT: retq
%ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
%val = extractvalue { i64, i1 } %ov, 0
@@ -41,6 +57,77 @@ define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) {
ret i64 %r2
}
+; Non-constant addend, fold should still fire.
+define i32 @g_nonconstant_i32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: g_nonconstant_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: negl %edx
+; CHECK-NEXT: subl %esi, %eax
+; CHECK-NEXT: sbbl %edx, %eax
+; CHECK-NEXT: retq
+ %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+ %val = extractvalue { i32, i1 } %ov, 0
+ %bit = extractvalue { i32, i1 } %ov, 1
+ %ext = sext i1 %bit to i32
+ %r = add i32 %val, %ext
+ %r2 = add i32 %r, %c
+ ret i32 %r2
+}
+
+; Non-constant addend in commuted form, fold should still fire.
+define i64 @g_nonconstant_commuted(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: g_nonconstant_commuted:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: negq %rdx
+; CHECK-NEXT: subq %rsi, %rax
+; CHECK-NEXT: sbbq %rdx, %rax
+; CHECK-NEXT: retq
+ %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %val = extractvalue { i64, i1 } %ov, 0
+ %bit = extractvalue { i64, i1 } %ov, 1
+ %ext = sext i1 %bit to i64
+ %r = add i64 %val, %ext
+ %r2 = add i64 %c, %r
+ ret i64 %r2
+}
+
+; Non-constant addend in commuted form, fold should still fire.
+define i32 @g_nonconstant_commuted_i32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: g_nonconstant_commuted_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: negl %edx
+; CHECK-NEXT: subl %esi, %eax
+; CHECK-NEXT: sbbl %edx, %eax
+; CHECK-NEXT: retq
+ %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+ %val = extractvalue { i32, i1 } %ov, 0
+ %bit = extractvalue { i32, i1 } %ov, 1
+ %ext = sext i1 %bit to i32
+ %r = add i32 %val, %ext
+ %r2 = add i32 %c, %r
+ ret i32 %r2
+}
+
+; INT_MIN should fold correctly too.
+define i32 @g_i32_int_min(i32 %a, i32 %b) {
+; CHECK-LABEL: g_i32_int_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: subl %esi, %eax
+; CHECK-NEXT: sbbl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT: retq
+ %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+ %val = extractvalue { i32, i1 } %ov, 0
+ %bit = extractvalue { i32, i1 } %ov, 1
+ %ext = sext i1 %bit to i32
+ %r = add i32 %val, %ext
+ %r2 = add i32 %r, -2147483648
+ ret i32 %r2
+}
+
; Multiple uses of SBB result, fold should not fire
define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) {
; CHECK-LABEL: g_multi_use:
@@ -60,6 +147,26 @@ define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) {
ret i64 %r
}
+; Multiple uses of SBB result, fold should not fire
+define i32 @g_multi_use_i32(i32 %a, i32 %b, ptr %out) {
+; CHECK-LABEL: g_multi_use_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: sbbl $0, %edi
+; CHECK-NEXT: movl %edi, (%rdx)
+; CHECK-NEXT: leal 10(%rdi), %eax
+; CHECK-NEXT: retq
+ %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+ %val = extractvalue { i32, i1 } %ov, 0
+ %bit = extractvalue { i32, i1 } %ov, 1
+ %ext = sext i1 %bit to i32
+ %sbb = add i32 %val, %ext
+ store i32 %sbb, ptr %out
+ %r = add i32 %sbb, 10
+ ret i32 %r
+}
+
; Flags live across the low-limb constant add into the next SBB in the chain.
; Fold should not fire.
define {i64, i64} @g_flags_live(i64 %a_lo, i64 %a_hi, i64 %b_lo, i64 %b_hi) {
More information about the llvm-commits
mailing list