[llvm] 4e675a0 - [SelectionDAG] Lowering usub.sat(a, 1) to a - (a != 0) (#170076)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 18 06:31:57 PST 2025
Author: guan jian
Date: 2025-12-18T14:31:53Z
New Revision: 4e675a0c459c49845be33f757132008f4e8e5074
URL: https://github.com/llvm/llvm-project/commit/4e675a0c459c49845be33f757132008f4e8e5074
DIFF: https://github.com/llvm/llvm-project/commit/4e675a0c459c49845be33f757132008f4e8e5074.diff
LOG: [SelectionDAG] Lowering usub.sat(a, 1) to a - (a != 0) (#170076)
I recently observed that LLVM generates the following code:
```
addi a1, a0, -1
sltu a0, a0, a1
addi a0, a0, -1
and a0, a0, a1
ret
```
This could be optimized using the snez instruction instead.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/and-mask-removal.ll
llvm/test/CodeGen/RISCV/usub_sat.ll
llvm/test/CodeGen/X86/combine-sub-usat.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0ffcf419e9419..29e13c558fca8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10895,6 +10895,18 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
}
+ // usub.sat(a, 1) -> sub(a, zext(a != 0))
+ if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
+ LHS = DAG.getFreeze(LHS);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
+ SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
+ Subtrahend =
+ DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
+ return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
+ }
+
// uadd.sat(a, b) -> umin(a, ~b) + b
if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 5046c0571ad2b..855fe5caf97b2 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -483,9 +483,9 @@ define i64 @pr58109(i8 signext %0) {
; CHECK-SD-LABEL: pr58109:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: add w8, w0, #1
-; CHECK-SD-NEXT: and w8, w8, #0xff
-; CHECK-SD-NEXT: subs w8, w8, #1
-; CHECK-SD-NEXT: csel w0, wzr, w8, lo
+; CHECK-SD-NEXT: ands w8, w8, #0xff
+; CHECK-SD-NEXT: cset w9, ne
+; CHECK-SD-NEXT: sub w0, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: pr58109:
diff --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll
index 33056682dcc79..361719b2a0671 100644
--- a/llvm/test/CodeGen/RISCV/usub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/usub_sat.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV32IZbb
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV64IZbb
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v | FileCheck %s --check-prefix=RV32I
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s --check-prefix=RV64I
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+v | FileCheck %s --check-prefix=RV32IZbb
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+v | FileCheck %s --check-prefix=RV64IZbb
define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
; RV32I-LABEL: func:
@@ -185,3 +185,167 @@ define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);
ret i4 %tmp;
}
+
+define signext i32 @fun9(i32 signext %x) nounwind {
+; RV32I-LABEL: fun9:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a1, a0
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fun9:
+; RV64I: # %bb.0:
+; RV64I-NEXT: snez a1, a0
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV32IZbb-LABEL: fun9:
+; RV32IZbb: # %bb.0:
+; RV32IZbb-NEXT: li a1, 1
+; RV32IZbb-NEXT: maxu a0, a0, a1
+; RV32IZbb-NEXT: addi a0, a0, -1
+; RV32IZbb-NEXT: ret
+;
+; RV64IZbb-LABEL: fun9:
+; RV64IZbb: # %bb.0:
+; RV64IZbb-NEXT: li a1, 1
+; RV64IZbb-NEXT: maxu a0, a0, a1
+; RV64IZbb-NEXT: addiw a0, a0, -1
+; RV64IZbb-NEXT: ret
+ %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 1)
+ ret i32 %tmp
+}
+
+define signext i32 @fun10(i32 signext %x) nounwind {
+; RV32I-LABEL: fun10:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fun10:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32IZbb-LABEL: fun10:
+; RV32IZbb: # %bb.0:
+; RV32IZbb-NEXT: ret
+;
+; RV64IZbb-LABEL: fun10:
+; RV64IZbb: # %bb.0:
+; RV64IZbb-NEXT: ret
+ %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 0)
+ ret i32 %tmp
+}
+
+define signext i32 @fun11(i32 signext %x) nounwind {
+; RV32I-LABEL: fun11:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a1, a0, 1
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fun11:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addiw a1, a0, 1
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV32IZbb-LABEL: fun11:
+; RV32IZbb: # %bb.0:
+; RV32IZbb-NEXT: li a0, 0
+; RV32IZbb-NEXT: ret
+;
+; RV64IZbb-LABEL: fun11:
+; RV64IZbb: # %bb.0:
+; RV64IZbb-NEXT: li a0, 0
+; RV64IZbb-NEXT: ret
+ %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 -1)
+ ret i32 %tmp
+}
+
+define <4 x i32> @fun12(<4 x i32> %a0) nounwind {
+; RV32I-LABEL: fun12:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, 1
+; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32I-NEXT: vssubu.vx v8, v8, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fun12:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, 1
+; RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64I-NEXT: vssubu.vx v8, v8, a0
+; RV64I-NEXT: ret
+;
+; RV32IZbb-LABEL: fun12:
+; RV32IZbb: # %bb.0:
+; RV32IZbb-NEXT: li a0, 1
+; RV32IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32IZbb-NEXT: vssubu.vx v8, v8, a0
+; RV32IZbb-NEXT: ret
+;
+; RV64IZbb-LABEL: fun12:
+; RV64IZbb: # %bb.0:
+; RV64IZbb-NEXT: li a0, 1
+; RV64IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64IZbb-NEXT: vssubu.vx v8, v8, a0
+; RV64IZbb-NEXT: ret
+ %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 1))
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @fun13(<4 x i32> %a0) nounwind {
+; RV32I-LABEL: fun13:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, -1
+; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32I-NEXT: vssubu.vx v8, v8, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fun13:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, -1
+; RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64I-NEXT: vssubu.vx v8, v8, a0
+; RV64I-NEXT: ret
+;
+; RV32IZbb-LABEL: fun13:
+; RV32IZbb: # %bb.0:
+; RV32IZbb-NEXT: li a0, -1
+; RV32IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32IZbb-NEXT: vssubu.vx v8, v8, a0
+; RV32IZbb-NEXT: ret
+;
+; RV64IZbb-LABEL: fun13:
+; RV64IZbb: # %bb.0:
+; RV64IZbb-NEXT: li a0, -1
+; RV64IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64IZbb-NEXT: vssubu.vx v8, v8, a0
+; RV64IZbb-NEXT: ret
+ %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 -1))
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @fun14(<4 x i32> %a0) nounwind {
+; RV32I-LABEL: fun14:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fun14:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32IZbb-LABEL: fun14:
+; RV32IZbb: # %bb.0:
+; RV32IZbb-NEXT: ret
+;
+; RV64IZbb-LABEL: fun14:
+; RV64IZbb: # %bb.0:
+; RV64IZbb-NEXT: ret
+ %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 0))
+ ret <4 x i32> %1
+}
diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll
index e601c5733bd42..7ec4e062930db 100644
--- a/llvm/test/CodeGen/X86/combine-sub-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll
@@ -116,9 +116,9 @@ define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) {
define i32 @combine_dec_i32(i32 %a0) {
; CHECK-LABEL: combine_dec_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: subl $1, %edi
-; CHECK-NEXT: cmovael %edi, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: cmpl $1, %edi
+; CHECK-NEXT: adcl $-1, %eax
; CHECK-NEXT: retq
%1 = call i32 @llvm.usub.sat.i32(i32 %a0, i32 1)
ret i32 %1
More information about the llvm-commits
mailing list