[llvm] 4e675a0 - [SelectionDAG] Lowering usub.sat(a, 1) to a - (a != 0) (#170076)

Thu Dec 18 06:31:57 PST 2025

Author: guan jian
Date: 2025-12-18T14:31:53Z
New Revision: 4e675a0c459c49845be33f757132008f4e8e5074

URL: https://github.com/llvm/llvm-project/commit/4e675a0c459c49845be33f757132008f4e8e5074
DIFF: https://github.com/llvm/llvm-project/commit/4e675a0c459c49845be33f757132008f4e8e5074.diff

LOG: [SelectionDAG] Lowering usub.sat(a, 1) to a - (a != 0) (#170076)

I recently observed that LLVM generates the following code:
```
	addi	a1, a0, -1
	sltu	a0, a0, a1
	addi	a0, a0, -1
	and	a0, a0, a1
	ret
```
This could be optimized using the snez instruction instead.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AArch64/and-mask-removal.ll
    llvm/test/CodeGen/RISCV/usub_sat.ll
    llvm/test/CodeGen/X86/combine-sub-usat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0ffcf419e9419..29e13c558fca8 100644

--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10895,6 +10895,18 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
   }
 
+  // usub.sat(a, 1) -> sub(a, zext(a != 0))
+  if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
+    LHS = DAG.getFreeze(LHS);
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
+    SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
+    Subtrahend =
+        DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
+    return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
+  }
+
   // uadd.sat(a, b) -> umin(a, ~b) + b
   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);

diff  --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 5046c0571ad2b..855fe5caf97b2 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -483,9 +483,9 @@ define i64 @pr58109(i8 signext %0) {
 ; CHECK-SD-LABEL: pr58109:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    add w8, w0, #1
-; CHECK-SD-NEXT:    and w8, w8, #0xff
-; CHECK-SD-NEXT:    subs w8, w8, #1
-; CHECK-SD-NEXT:    csel w0, wzr, w8, lo
+; CHECK-SD-NEXT:    ands w8, w8, #0xff
+; CHECK-SD-NEXT:    cset w9, ne
+; CHECK-SD-NEXT:    sub w0, w8, w9
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: pr58109:

diff  --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll
index 33056682dcc79..361719b2a0671 100644
--- a/llvm/test/CodeGen/RISCV/usub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/usub_sat.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV32IZbb
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV64IZbb
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v | FileCheck %s --check-prefix=RV32I
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s --check-prefix=RV64I
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+v | FileCheck %s --check-prefix=RV32IZbb
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+v | FileCheck %s --check-prefix=RV64IZbb
 
 define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 ; RV32I-LABEL: func:
@@ -185,3 +185,167 @@ define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);
   ret i4 %tmp;
 }
+
+define signext i32 @fun9(i32 signext %x) nounwind {
+; RV32I-LABEL: fun9:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a1, a0
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fun9:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    snez a1, a0
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32IZbb-LABEL: fun9:
+; RV32IZbb:       # %bb.0:
+; RV32IZbb-NEXT:    li a1, 1
+; RV32IZbb-NEXT:    maxu a0, a0, a1
+; RV32IZbb-NEXT:    addi a0, a0, -1
+; RV32IZbb-NEXT:    ret
+;
+; RV64IZbb-LABEL: fun9:
+; RV64IZbb:       # %bb.0:
+; RV64IZbb-NEXT:    li a1, 1
+; RV64IZbb-NEXT:    maxu a0, a0, a1
+; RV64IZbb-NEXT:    addiw a0, a0, -1
+; RV64IZbb-NEXT:    ret
+  %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 1)
+  ret i32 %tmp
+}
+
+define signext i32 @fun10(i32 signext %x) nounwind {
+; RV32I-LABEL: fun10:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fun10:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32IZbb-LABEL: fun10:
+; RV32IZbb:       # %bb.0:
+; RV32IZbb-NEXT:    ret
+;
+; RV64IZbb-LABEL: fun10:
+; RV64IZbb:       # %bb.0:
+; RV64IZbb-NEXT:    ret
+  %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 0)
+  ret i32 %tmp
+}
+
+define signext i32 @fun11(i32 signext %x) nounwind {
+; RV32I-LABEL: fun11:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a1, a0, 1
+; RV32I-NEXT:    sltu a0, a0, a1
+; RV32I-NEXT:    addi a0, a0, -1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fun11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addiw a1, a0, 1
+; RV64I-NEXT:    sltu a0, a0, a1
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32IZbb-LABEL: fun11:
+; RV32IZbb:       # %bb.0:
+; RV32IZbb-NEXT:    li a0, 0
+; RV32IZbb-NEXT:    ret
+;
+; RV64IZbb-LABEL: fun11:
+; RV64IZbb:       # %bb.0:
+; RV64IZbb-NEXT:    li a0, 0
+; RV64IZbb-NEXT:    ret
+  %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 -1)
+  ret i32 %tmp
+}
+
+define <4 x i32> @fun12(<4 x i32> %a0) nounwind {
+; RV32I-LABEL: fun12:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32I-NEXT:    vssubu.vx v8, v8, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fun12:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64I-NEXT:    vssubu.vx v8, v8, a0
+; RV64I-NEXT:    ret
+;
+; RV32IZbb-LABEL: fun12:
+; RV32IZbb:       # %bb.0:
+; RV32IZbb-NEXT:    li a0, 1
+; RV32IZbb-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32IZbb-NEXT:    vssubu.vx v8, v8, a0
+; RV32IZbb-NEXT:    ret
+;
+; RV64IZbb-LABEL: fun12:
+; RV64IZbb:       # %bb.0:
+; RV64IZbb-NEXT:    li a0, 1
+; RV64IZbb-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64IZbb-NEXT:    vssubu.vx v8, v8, a0
+; RV64IZbb-NEXT:    ret
+  %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 1))
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @fun13(<4 x i32> %a0) nounwind {
+; RV32I-LABEL: fun13:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a0, -1
+; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32I-NEXT:    vssubu.vx v8, v8, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fun13:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64I-NEXT:    vssubu.vx v8, v8, a0
+; RV64I-NEXT:    ret
+;
+; RV32IZbb-LABEL: fun13:
+; RV32IZbb:       # %bb.0:
+; RV32IZbb-NEXT:    li a0, -1
+; RV32IZbb-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32IZbb-NEXT:    vssubu.vx v8, v8, a0
+; RV32IZbb-NEXT:    ret
+;
+; RV64IZbb-LABEL: fun13:
+; RV64IZbb:       # %bb.0:
+; RV64IZbb-NEXT:    li a0, -1
+; RV64IZbb-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64IZbb-NEXT:    vssubu.vx v8, v8, a0
+; RV64IZbb-NEXT:    ret
+  %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 -1))
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @fun14(<4 x i32> %a0) nounwind {
+; RV32I-LABEL: fun14:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: fun14:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32IZbb-LABEL: fun14:
+; RV32IZbb:       # %bb.0:
+; RV32IZbb-NEXT:    ret
+;
+; RV64IZbb-LABEL: fun14:
+; RV64IZbb:       # %bb.0:
+; RV64IZbb-NEXT:    ret
+  %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 0))
+  ret <4 x i32> %1
+}

diff  --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll
index e601c5733bd42..7ec4e062930db 100644
--- a/llvm/test/CodeGen/X86/combine-sub-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll
@@ -116,9 +116,9 @@ define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) {
 define i32 @combine_dec_i32(i32 %a0) {
 ; CHECK-LABEL: combine_dec_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    subl $1, %edi
-; CHECK-NEXT:    cmovael %edi, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    adcl $-1, %eax
 ; CHECK-NEXT:    retq
   %1 = call i32 @llvm.usub.sat.i32(i32 %a0, i32 1)
   ret i32 %1