[llvm] [X86] fuse constant addition after sbb (PR #184541)

Mon Mar 9 18:48:20 PDT 2026

https://github.com/Takashiidobe updated https://github.com/llvm/llvm-project/pull/184541

>From 877ed909ddf9a9ea3780d72693a009c598088ba8 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Fri, 6 Mar 2026 16:47:08 -0500
Subject: [PATCH 1/4] fuse constant addition after sbb for x86 only

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1ebfd5defdc40..e57e1dbdfda3e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59647,6 +59647,22 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
                        Op0.getOperand(0), Op0.getOperand(2));
   }
 
+  // Fold ADD(SBB(Y,0,W),C) -> SBB(Y,-C,W)
+  // SBB(Y,0,W) = Y - 0 - CF = Y - CF; adding C gives Y - CF + C = Y - (-C) -
+  // CF. The SBB flags output must be dead: changing the subtrahend from 0 to -C
+  // produces different EFLAGS bits.
+  if (Op0.getOpcode() == X86ISD::SBB && Op0->hasOneUse() &&
+      X86::isZeroNode(Op0.getOperand(1)) && !Op0->hasAnyUseOfValue(1)) {
+    if (auto *C = dyn_cast<ConstantSDNode>(Op1)) {
+      SDLoc SBBLoc(Op0);
+      return DAG
+          .getNode(X86ISD::SBB, SBBLoc, Op0->getVTList(), Op0.getOperand(0),
+                   DAG.getConstant(-C->getAPIntValue(), SBBLoc, VT),
+                   Op0.getOperand(2))
+          .getValue(0);
+    }
+  }
+
   if (SDValue IFMA52 = matchVPMADD52(N, DAG, DL, VT, Subtarget))
     return IFMA52;
 

>From e6dbeb44fa5222171f135965dbed6509f724b9b3 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sat, 7 Mar 2026 10:28:04 -0500
Subject: [PATCH 2/4] add tests and fix up test breakage due to SBB change

---
 .../CodeGen/X86/apx/long-instruction-fixup.ll |  8 +-
 llvm/test/CodeGen/X86/apx/sbb.ll              | 54 +++++-------
 llvm/test/CodeGen/X86/sbb-add-constant.ll     | 84 +++++++++++++++++++
 llvm/test/CodeGen/X86/select_const.ll         |  4 +-
 4 files changed, 110 insertions(+), 40 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/sbb-add-constant.ll

diff --git a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
index 30c485836797f..839ec45051367 100644
--- a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
+++ b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
@@ -169,8 +169,8 @@ define i32 @sbb32mi_GS(i32 %x, i32 %y) {
 ; CHECK-LABEL: sbb32mi_GS:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    sbbl $0, %gs:255, %eax
-; CHECK-NEXT:    addl $-123456, %eax # imm = 0xFFFE1DC0
+; CHECK-NEXT:    movl %gs:255, %eax
+; CHECK-NEXT:    sbbl $123456, %eax # imm = 0x1E240
 ; CHECK-NEXT:    retq
 entry:
   %a= inttoptr i32 255 to ptr addrspace(256)
@@ -186,8 +186,8 @@ define i64 @sbb64mi_FS(i64 %x, i64 %y) {
 ; CHECK-LABEL: sbb64mi_FS:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpq %rdi, %rsi
-; CHECK-NEXT:    sbbq $0, %fs:255, %rax
-; CHECK-NEXT:    addq $-123456, %rax # imm = 0xFFFE1DC0
+; CHECK-NEXT:    movq %fs:255, %rax
+; CHECK-NEXT:    sbbq $123456, %rax # imm = 0x1E240
 ; CHECK-NEXT:    retq
 entry:
   %a= inttoptr i64 255 to ptr addrspace(257)
diff --git a/llvm/test/CodeGen/X86/apx/sbb.ll b/llvm/test/CodeGen/X86/apx/sbb.ll
index a67419bbd5db7..0b009c6cbcdb5 100644
--- a/llvm/test/CodeGen/X86/apx/sbb.ll
+++ b/llvm/test/CodeGen/X86/apx/sbb.ll
@@ -113,8 +113,7 @@ define i16 @sbb16ri8(i16 %a, i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: sbb16ri8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT:    sbbw $0, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT:    addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85]
+; CHECK-NEXT:    sbbw $123, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i16 %a, 123
   %k = icmp ugt i16 %x, %y
@@ -127,8 +126,7 @@ define i32 @sbb32ri8(i32 %a, i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: sbb32ri8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT:    sbbl $0, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT:    addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85]
+; CHECK-NEXT:    sbbl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i32 %a, 123
   %k = icmp ugt i32 %x, %y
@@ -141,8 +139,7 @@ define i64 @sbb64ri8(i64 %a, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: sbb64ri8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT:    sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT:    addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85]
+; CHECK-NEXT:    sbbq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i64 %a, 123
   %k = icmp ugt i64 %x, %y
@@ -155,8 +152,7 @@ define i8 @sbb8ri(i8 %a, i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: sbb8ri:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpb %sil, %dl # encoding: [0x40,0x38,0xf2]
-; CHECK-NEXT:    sbbb $0, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xdf,0x00]
-; CHECK-NEXT:    addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85]
+; CHECK-NEXT:    sbbb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xdf,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i8 %a, 123
   %k = icmp ugt i8 %x, %y
@@ -169,9 +165,8 @@ define i16 @sbb16ri(i16 %a, i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: sbb16ri:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT:    sbbw $0, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT:    addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb]
-; CHECK-NEXT:    # imm = 0xFB2E
+; CHECK-NEXT:    sbbw $1234, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0xdf,0xd2,0x04]
+; CHECK-NEXT:    # imm = 0x4D2
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i16 %a, 1234
   %k = icmp ugt i16 %x, %y
@@ -184,9 +179,8 @@ define i32 @sbb32ri(i32 %a, i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: sbb32ri:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT:    sbbl $0, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT:    addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT:    # imm = 0xFFFE1DC0
+; CHECK-NEXT:    sbbl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xdf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT:    # imm = 0x1E240
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i32 %a, 123456
   %k = icmp ugt i32 %x, %y
@@ -199,9 +193,8 @@ define i64 @sbb64ri(i64 %a, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: sbb64ri:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT:    sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00]
-; CHECK-NEXT:    addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT:    # imm = 0xFFFE1DC0
+; CHECK-NEXT:    sbbq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xdf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT:    # imm = 0x1E240
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %s = sub i64 %a, 123456
   %k = icmp ugt i64 %x, %y
@@ -270,8 +263,7 @@ define i16 @sbb16mi8(ptr %ptr, i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: sbb16mi8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT:    sbbw $0, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT:    addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85]
+; CHECK-NEXT:    sbbw $123, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i16, ptr %ptr
   %s = sub i16 %a, 123
@@ -285,8 +277,7 @@ define i32 @sbb32mi8(ptr %ptr, i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: sbb32mi8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT:    sbbl $0, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT:    addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85]
+; CHECK-NEXT:    sbbl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i32, ptr %ptr
   %s = sub i32 %a, 123
@@ -300,8 +291,7 @@ define i64 @sbb64mi8(ptr %ptr, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: sbb64mi8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT:    sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT:    addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85]
+; CHECK-NEXT:    sbbq $123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i64, ptr %ptr
   %s = sub i64 %a, 123
@@ -315,8 +305,7 @@ define i8 @sbb8mi(ptr %ptr, i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: sbb8mi:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpb %sil, %dl # encoding: [0x40,0x38,0xf2]
-; CHECK-NEXT:    sbbb $0, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x1f,0x00]
-; CHECK-NEXT:    addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85]
+; CHECK-NEXT:    sbbb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x1f,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i8, ptr %ptr
   %s = sub i8 %a, 123
@@ -330,9 +319,8 @@ define i16 @sbb16mi(ptr %ptr, i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: sbb16mi:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpw %si, %dx # encoding: [0x66,0x39,0xf2]
-; CHECK-NEXT:    sbbw $0, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT:    addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb]
-; CHECK-NEXT:    # imm = 0xFB2E
+; CHECK-NEXT:    sbbw $1234, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0x1f,0xd2,0x04]
+; CHECK-NEXT:    # imm = 0x4D2
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i16, ptr %ptr
   %s = sub i16 %a, 1234
@@ -346,9 +334,8 @@ define i32 @sbb32mi(ptr %ptr, i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: sbb32mi:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpl %esi, %edx # encoding: [0x39,0xf2]
-; CHECK-NEXT:    sbbl $0, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT:    addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT:    # imm = 0xFFFE1DC0
+; CHECK-NEXT:    sbbl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x1f,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT:    # imm = 0x1E240
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i32, ptr %ptr
   %s = sub i32 %a, 123456
@@ -362,9 +349,8 @@ define i64 @sbb64mi(ptr %ptr, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: sbb64mi:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2]
-; CHECK-NEXT:    sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00]
-; CHECK-NEXT:    addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT:    # imm = 0xFFFE1DC0
+; CHECK-NEXT:    sbbq $123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x1f,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT:    # imm = 0x1E240
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %a = load i64, ptr %ptr
   %s = sub i64 %a, 123456
diff --git a/llvm/test/CodeGen/X86/sbb-add-constant.ll b/llvm/test/CodeGen/X86/sbb-add-constant.ll
new file mode 100644
index 0000000000000..e6e95029fa3b2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sbb-add-constant.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+;
+; Verify that ADD(SBB(Y,0,flags),C) folds to SBB(Y,-C,flags).
+; SBB(Y,0) = Y - CF; adding C gives Y - CF + C = Y - (-C) - CF = SBB(Y,-C).
+;
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
+declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64)
+
+; Fold should fire because all conditions are met
+define i64 @g_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: g_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rsi, %rax
+; CHECK-NEXT:    sbbq $-10, %rax
+; CHECK-NEXT:    retq
+  %ov  = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+  %val = extractvalue { i64, i1 } %ov, 0
+  %bit = extractvalue { i64, i1 } %ov, 1
+  %ext = sext i1 %bit to i64
+  %r   = add i64 %val, %ext
+  %r2  = add i64 %r, 10
+  ret i64 %r2
+}
+
+; Non-constant addend, fold should not fire
+define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: g_nonconstant:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq %rsi, %rdi
+; CHECK-NEXT:    sbbq $0, %rdi
+; CHECK-NEXT:    leaq (%rdi,%rdx), %rax
+; CHECK-NEXT:    retq
+  %ov  = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+  %val = extractvalue { i64, i1 } %ov, 0
+  %bit = extractvalue { i64, i1 } %ov, 1
+  %ext = sext i1 %bit to i64
+  %r   = add i64 %val, %ext
+  %r2  = add i64 %r, %c
+  ret i64 %r2
+}
+
+; Multiple uses of SBB result, fold should not fire
+define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) {
+; CHECK-LABEL: g_multi_use:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq %rsi, %rdi
+; CHECK-NEXT:    sbbq $0, %rdi
+; CHECK-NEXT:    movq %rdi, (%rdx)
+; CHECK-NEXT:    leaq 10(%rdi), %rax
+; CHECK-NEXT:    retq
+  %ov  = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+  %val = extractvalue { i64, i1 } %ov, 0
+  %bit = extractvalue { i64, i1 } %ov, 1
+  %ext = sext i1 %bit to i64
+  %sbb = add i64 %val, %ext
+  store i64 %sbb, ptr %out
+  %r   = add i64 %sbb, 10
+  ret i64 %r
+}
+
+; Flags live across the low-limb constant add into the next SBB in the chain.
+; Fold should not fire.
+define {i64, i64} @g_flags_live(i64 %a_lo, i64 %a_hi, i64 %b_lo, i64 %b_hi) {
+; CHECK-LABEL: g_flags_live:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq %rdx, %rdi
+; CHECK-NEXT:    leaq 10(%rdi), %rax
+; CHECK-NEXT:    sbbq %rcx, %rsi
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %lo = call { i8, i64 } @llvm.x86.subborrow.64(i8 0, i64 %a_lo, i64 %b_lo)
+  %lo_b = extractvalue { i8, i64 } %lo, 0
+  %lo_val = extractvalue { i8, i64 } %lo, 1
+  %lo_plus = add i64 %lo_val, 10
+
+  %hi = call { i8, i64 } @llvm.x86.subborrow.64(i8 %lo_b, i64 %a_hi, i64 %b_hi)
+  %hi_val = extractvalue { i8, i64 } %hi, 1
+
+  %ret = insertvalue {i64, i64} poison, i64 %lo_plus, 0
+  %ret2 = insertvalue {i64, i64} %ret, i64 %hi_val, 1
+  ret {i64, i64} %ret2
+}
diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll
index 35f4655dd6d7c..a7da07f1ae5df 100644
--- a/llvm/test/CodeGen/X86/select_const.ll
+++ b/llvm/test/CodeGen/X86/select_const.ll
@@ -439,9 +439,9 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
 ;
 ; X64-LABEL: sel_1_2:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    cmpq $42, %rdi
-; X64-NEXT:    sbbq $0, %rsi
-; X64-NEXT:    leaq 2(%rsi), %rax
+; X64-NEXT:    sbbq $-2, %rax
 ; X64-NEXT:    retq
   %cmp = icmp ult i64 %x, 42
   %sel = select i1 %cmp, i64 1, i64 2

>From 56bbcbc1a782c6632944787a7c5b57a2a1ad1345 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Mon, 9 Mar 2026 21:47:37 -0400
Subject: [PATCH 3/4] allow x86 implementation of sbb fold to also take
 non-constant operand and handle swapping operands if necessary

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e57e1dbdfda3e..6eb6410960af7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59651,16 +59651,17 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   // SBB(Y,0,W) = Y - 0 - CF = Y - CF; adding C gives Y - CF + C = Y - (-C) -
   // CF. The SBB flags output must be dead: changing the subtrahend from 0 to -C
   // produces different EFLAGS bits.
-  if (Op0.getOpcode() == X86ISD::SBB && Op0->hasOneUse() &&
-      X86::isZeroNode(Op0.getOperand(1)) && !Op0->hasAnyUseOfValue(1)) {
-    if (auto *C = dyn_cast<ConstantSDNode>(Op1)) {
-      SDLoc SBBLoc(Op0);
-      return DAG
-          .getNode(X86ISD::SBB, SBBLoc, Op0->getVTList(), Op0.getOperand(0),
-                   DAG.getConstant(-C->getAPIntValue(), SBBLoc, VT),
-                   Op0.getOperand(2))
-          .getValue(0);
-    }
+  SDValue SBB = Op0;
+  SDValue C = Op1;
+  if (SBB.getOpcode() != X86ISD::SBB)
+    std::swap(SBB, C);
+  if (SBB.getOpcode() == X86ISD::SBB && SBB->hasOneUse() &&
+      X86::isZeroNode(SBB.getOperand(1)) && !SBB->hasAnyUseOfValue(1)) {
+    SDLoc SBBLoc(SBB);
+    return DAG
+        .getNode(X86ISD::SBB, SBBLoc, SBB->getVTList(), SBB.getOperand(0),
+                 DAG.getNegative(C, SBBLoc, VT), SBB.getOperand(2))
+        .getValue(0);
   }
 
   if (SDValue IFMA52 = matchVPMADD52(N, DAG, DL, VT, Subtarget))

>From b5b37dfada73e7db3e88839fd9f1c693da181513 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Mon, 9 Mar 2026 21:47:50 -0400
Subject: [PATCH 4/4] add tests for sbb fold

---
 llvm/test/CodeGen/X86/sbb-add-constant.ll | 119 ++++++++++++++++++++--
 1 file changed, 113 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/X86/sbb-add-constant.ll b/llvm/test/CodeGen/X86/sbb-add-constant.ll
index e6e95029fa3b2..5a4f547ca5894 100644
--- a/llvm/test/CodeGen/X86/sbb-add-constant.ll
+++ b/llvm/test/CodeGen/X86/sbb-add-constant.ll
@@ -4,8 +4,6 @@
 ; Verify that ADD(SBB(Y,0,flags),C) folds to SBB(Y,-C,flags).
 ; SBB(Y,0) = Y - CF; adding C gives Y - CF + C = Y - (-C) - CF = SBB(Y,-C).
 ;
-declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
-declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64)
 
 ; Fold should fire because all conditions are met
 define i64 @g_i64(i64 %a, i64 %b) {
@@ -24,13 +22,31 @@ define i64 @g_i64(i64 %a, i64 %b) {
   ret i64 %r2
 }
 
-; Non-constant addend, fold should not fire
+; Fold should fire because all conditions are met
+define i32 @g_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: g_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    subl %esi, %eax
+; CHECK-NEXT:    sbbl $-10, %eax
+; CHECK-NEXT:    retq
+  %ov  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %val = extractvalue { i32, i1 } %ov, 0
+  %bit = extractvalue { i32, i1 } %ov, 1
+  %ext = sext i1 %bit to i32
+  %r   = add i32 %val, %ext
+  %r2  = add i32 %r, 10
+  ret i32 %r2
+}
+
+; Non-constant addend, fold should still fire.
 define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: g_nonconstant:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq %rsi, %rdi
-; CHECK-NEXT:    sbbq $0, %rdi
-; CHECK-NEXT:    leaq (%rdi,%rdx), %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    negq %rdx
+; CHECK-NEXT:    subq %rsi, %rax
+; CHECK-NEXT:    sbbq %rdx, %rax
 ; CHECK-NEXT:    retq
   %ov  = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
   %val = extractvalue { i64, i1 } %ov, 0
@@ -41,6 +57,77 @@ define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) {
   ret i64 %r2
 }
 
+; Non-constant addend, fold should still fire.
+define i32 @g_nonconstant_i32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: g_nonconstant_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    negl %edx
+; CHECK-NEXT:    subl %esi, %eax
+; CHECK-NEXT:    sbbl %edx, %eax
+; CHECK-NEXT:    retq
+  %ov  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %val = extractvalue { i32, i1 } %ov, 0
+  %bit = extractvalue { i32, i1 } %ov, 1
+  %ext = sext i1 %bit to i32
+  %r   = add i32 %val, %ext
+  %r2  = add i32 %r, %c
+  ret i32 %r2
+}
+
+; Non-constant addend in commuted form, fold should still fire.
+define i64 @g_nonconstant_commuted(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: g_nonconstant_commuted:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    negq %rdx
+; CHECK-NEXT:    subq %rsi, %rax
+; CHECK-NEXT:    sbbq %rdx, %rax
+; CHECK-NEXT:    retq
+  %ov  = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+  %val = extractvalue { i64, i1 } %ov, 0
+  %bit = extractvalue { i64, i1 } %ov, 1
+  %ext = sext i1 %bit to i64
+  %r   = add i64 %val, %ext
+  %r2  = add i64 %c, %r
+  ret i64 %r2
+}
+
+; Non-constant addend in commuted form, fold should still fire.
+define i32 @g_nonconstant_commuted_i32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: g_nonconstant_commuted_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    negl %edx
+; CHECK-NEXT:    subl %esi, %eax
+; CHECK-NEXT:    sbbl %edx, %eax
+; CHECK-NEXT:    retq
+  %ov  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %val = extractvalue { i32, i1 } %ov, 0
+  %bit = extractvalue { i32, i1 } %ov, 1
+  %ext = sext i1 %bit to i32
+  %r   = add i32 %val, %ext
+  %r2  = add i32 %c, %r
+  ret i32 %r2
+}
+
+; INT_MIN should fold correctly too.
+define i32 @g_i32_int_min(i32 %a, i32 %b) {
+; CHECK-LABEL: g_i32_int_min:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    subl %esi, %eax
+; CHECK-NEXT:    sbbl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT:    retq
+  %ov  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %val = extractvalue { i32, i1 } %ov, 0
+  %bit = extractvalue { i32, i1 } %ov, 1
+  %ext = sext i1 %bit to i32
+  %r   = add i32 %val, %ext
+  %r2  = add i32 %r, -2147483648
+  ret i32 %r2
+}
+
 ; Multiple uses of SBB result, fold should not fire
 define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) {
 ; CHECK-LABEL: g_multi_use:
@@ -60,6 +147,26 @@ define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) {
   ret i64 %r
 }
 
+; Multiple uses of SBB result, fold should not fire
+define i32 @g_multi_use_i32(i32 %a, i32 %b, ptr %out) {
+; CHECK-LABEL: g_multi_use_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    subl %esi, %edi
+; CHECK-NEXT:    sbbl $0, %edi
+; CHECK-NEXT:    movl %edi, (%rdx)
+; CHECK-NEXT:    leal 10(%rdi), %eax
+; CHECK-NEXT:    retq
+  %ov  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %val = extractvalue { i32, i1 } %ov, 0
+  %bit = extractvalue { i32, i1 } %ov, 1
+  %ext = sext i1 %bit to i32
+  %sbb = add i32 %val, %ext
+  store i32 %sbb, ptr %out
+  %r   = add i32 %sbb, 10
+  ret i32 %r
+}
+
 ; Flags live across the low-limb constant add into the next SBB in the chain.
 ; Fold should not fire.
 define {i64, i64} @g_flags_live(i64 %a_lo, i64 %a_hi, i64 %b_lo, i64 %b_hi) {