[llvm] 807cc37 - [DAGCombiner] Fold subtraction if above threshold to `umin` (#134235)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 00:08:12 PDT 2025
Author: Piotr Fusik
Date: 2025-04-10T09:08:08+02:00
New Revision: 807cc3791fabd47bd24d57a8e4ab7df9117b3ede
URL: https://github.com/llvm/llvm-project/commit/807cc3791fabd47bd24d57a8e4ab7df9117b3ede
DIFF: https://github.com/llvm/llvm-project/commit/807cc3791fabd47bd24d57a8e4ab7df9117b3ede.diff
LOG: [DAGCombiner] Fold subtraction if above threshold to `umin` (#134235)
Folds patterns such as:
unsigned foo(unsigned x, unsigned y) {
return x >= y ? x - y : x;
}
Before, on RISC-V:
sltu a2, a0, a1
addi a2, a2, -1
and a1, a1, a2
subw a0, a0, a1
Or, with Zicond:
sltu a2, a0, a1
czero.nez a1, a1, a2
subw a0, a0, a1
After, with Zbb:
subw a1, a0, a1
minu a0, a0, a1
Only applies to unsigned comparisons.
If `x >= y` then `x - y` is less than or equal `x`.
Otherwise, `x - y` wraps and is greater than `x`.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/RISCV/rv32zbb.ll
llvm/test/CodeGen/RISCV/rv64zbb.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..9a28caa758659 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4251,6 +4251,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
+ // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+ // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
+ auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
+ if ((LK.first == TargetLoweringBase::TypeLegal ||
+ LK.first == TargetLoweringBase::TypePromoteInteger) &&
+ TLI.isOperationLegal(ISD::UMIN, LK.second)) {
+ SDValue Y;
+ if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y)))) ||
+ sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETUGE)),
+ m_Deferred(Y), m_Zero()))))
+ return DAG.getNode(ISD::UMIN, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 8103ddcc427cd..5afc5ecb44098 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1481,15 +1481,23 @@ entry:
}
define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: sub_if_uge_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: zext.b a2, a1
-; CHECK-NEXT: zext.b a3, a0
-; CHECK-NEXT: sltu a2, a3, a2
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; RV32I-LABEL: sub_if_uge_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a2, a1
+; RV32I-NEXT: zext.b a3, a0
+; RV32I-NEXT: sltu a2, a3, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: zext.b a2, a0
+; RV32ZBB-NEXT: sub a0, a0, a1
+; RV32ZBB-NEXT: zext.b a0, a0
+; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: ret
%cmp = icmp ult i8 %x, %y
%select = select i1 %cmp, i8 0, i8 %y
%sub = sub nuw i8 %x, %select
@@ -1511,12 +1519,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
;
; RV32ZBB-LABEL: sub_if_uge_i16:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: zext.h a2, a1
-; RV32ZBB-NEXT: zext.h a3, a0
-; RV32ZBB-NEXT: sltu a2, a3, a2
-; RV32ZBB-NEXT: addi a2, a2, -1
-; RV32ZBB-NEXT: and a1, a2, a1
+; RV32ZBB-NEXT: zext.h a2, a0
; RV32ZBB-NEXT: sub a0, a0, a1
+; RV32ZBB-NEXT: zext.h a0, a0
+; RV32ZBB-NEXT: minu a0, a2, a0
; RV32ZBB-NEXT: ret
%cmp = icmp ult i16 %x, %y
%select = select i1 %cmp, i16 0, i16 %y
@@ -1525,13 +1531,19 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
}
define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: sltu a2, a0, a1
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; RV32I-LABEL: sub_if_uge_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sub a1, a0, a1
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: ret
%cmp = icmp ult i32 %x, %y
%select = select i1 %cmp, i32 0, i32 %y
%sub = sub nuw i32 %x, %select
@@ -1643,21 +1655,35 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
}
define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: sltu a2, a0, a1
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a2, a2, a1
-; CHECK-NEXT: sub a2, a0, a2
-; CHECK-NEXT: bltu a0, a1, .LBB55_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a0, 4
-; CHECK-NEXT: sll a0, a2, a0
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB55_2:
-; CHECK-NEXT: li a0, 2
-; CHECK-NEXT: sll a0, a2, a0
-; CHECK-NEXT: ret
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sub a2, a0, a2
+; RV32I-NEXT: bltu a0, a1, .LBB55_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB55_2:
+; RV32I-NEXT: li a0, 2
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sub a2, a0, a1
+; RV32ZBB-NEXT: minu a2, a0, a2
+; RV32ZBB-NEXT: bltu a0, a1, .LBB55_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: li a0, 4
+; RV32ZBB-NEXT: sll a0, a2, a0
+; RV32ZBB-NEXT: ret
+; RV32ZBB-NEXT: .LBB55_2:
+; RV32ZBB-NEXT: li a0, 2
+; RV32ZBB-NEXT: sll a0, a2, a0
+; RV32ZBB-NEXT: ret
%cmp = icmp ult i32 %x, %y
%select = select i1 %cmp, i32 0, i32 %y
%sub = sub nuw i32 %x, %select
@@ -1665,3 +1691,30 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
%shl = shl i32 %sub, %select2
ret i32 %shl
}
+
+define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a3, a0, a1
+; RV32I-NEXT: xori a4, a3, 1
+; RV32I-NEXT: addi a3, a3, -1
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: sw a4, 0(a2)
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sltu a3, a0, a1
+; RV32ZBB-NEXT: sub a1, a0, a1
+; RV32ZBB-NEXT: xori a3, a3, 1
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: sw a3, 0(a2)
+; RV32ZBB-NEXT: ret
+ %cmp = icmp uge i32 %x, %y
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, ptr %z, align 4
+ %select = select i1 %cmp, i32 %y, i32 0
+ %sub = sub nuw i32 %x, %select
+ ret i32 %sub
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 844f6796f10b6..2ae10da967754 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1682,15 +1682,23 @@ entry:
}
define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: sub_if_uge_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: zext.b a2, a1
-; CHECK-NEXT: zext.b a3, a0
-; CHECK-NEXT: sltu a2, a3, a2
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: sub_if_uge_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a2, a1
+; RV64I-NEXT: zext.b a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i8:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: zext.b a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: zext.b a0, a0
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
%cmp = icmp ult i8 %x, %y
%select = select i1 %cmp, i8 0, i8 %y
%sub = sub nuw i8 %x, %select
@@ -1712,12 +1720,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
;
; RV64ZBB-LABEL: sub_if_uge_i16:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: zext.h a2, a1
-; RV64ZBB-NEXT: zext.h a3, a0
-; RV64ZBB-NEXT: sltu a2, a3, a2
-; RV64ZBB-NEXT: addi a2, a2, -1
-; RV64ZBB-NEXT: and a1, a2, a1
-; RV64ZBB-NEXT: sub a0, a0, a1
+; RV64ZBB-NEXT: zext.h a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: zext.h a0, a0
+; RV64ZBB-NEXT: minu a0, a2, a0
; RV64ZBB-NEXT: ret
%cmp = icmp ult i16 %x, %y
%select = select i1 %cmp, i16 0, i16 %y
@@ -1726,15 +1732,22 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
}
define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a2, a1
-; CHECK-NEXT: sext.w a3, a0
-; CHECK-NEXT: sltu a2, a3, a2
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: subw a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: sub_if_uge_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
%cmp = icmp ult i32 %x, %y
%select = select i1 %cmp, i32 0, i32 %y
%sub = sub nuw i32 %x, %select
@@ -1742,13 +1755,19 @@ define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
}
define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
-; CHECK-LABEL: sub_if_uge_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: sltu a2, a0, a1
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: sub_if_uge_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sub a1, a0, a1
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: ret
%cmp = icmp ult i64 %x, %y
%select = select i1 %cmp, i64 0, i64 %y
%sub = sub nuw i64 %x, %select
@@ -1798,23 +1817,39 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
}
define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a2, a1
-; CHECK-NEXT: sext.w a3, a0
-; CHECK-NEXT: sltu a4, a3, a2
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a1, a4, a1
-; CHECK-NEXT: subw a0, a0, a1
-; CHECK-NEXT: bltu a3, a2, .LBB68_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 4
-; CHECK-NEXT: sllw a0, a0, a1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB68_2:
-; CHECK-NEXT: li a1, 2
-; CHECK-NEXT: sllw a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a4, a3, a2
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a1, a4, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: bltu a3, a2, .LBB68_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: li a1, 4
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB68_2:
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a1
+; RV64ZBB-NEXT: sext.w a3, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: minu a0, a3, a0
+; RV64ZBB-NEXT: bltu a3, a2, .LBB68_2
+; RV64ZBB-NEXT: # %bb.1:
+; RV64ZBB-NEXT: li a1, 4
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+; RV64ZBB-NEXT: .LBB68_2:
+; RV64ZBB-NEXT: li a1, 2
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
%cmp = icmp ult i32 %x, %y
%select = select i1 %cmp, i32 0, i32 %y
%sub = sub nuw i32 %x, %select
@@ -1822,3 +1857,30 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
%shl = shl i32 %sub, %select2
ret i32 %shl
}
+
+define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, ptr %z) {
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a3, a0, a1
+; RV64I-NEXT: xori a4, a3, 1
+; RV64I-NEXT: addi a3, a3, -1
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sw a4, 0(a2)
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sltu a3, a0, a1
+; RV64ZBB-NEXT: subw a1, a0, a1
+; RV64ZBB-NEXT: xori a3, a3, 1
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: sw a3, 0(a2)
+; RV64ZBB-NEXT: ret
+ %cmp = icmp uge i32 %x, %y
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, ptr %z, align 4
+ %select = select i1 %cmp, i32 %y, i32 0
+ %sub = sub nuw i32 %x, %select
+ ret i32 %sub
+}
More information about the llvm-commits
mailing list