[llvm] 807cc37 - [DAGCombiner] Fold subtraction if above threshold to `umin` (#134235)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 10 00:08:12 PDT 2025


Author: Piotr Fusik
Date: 2025-04-10T09:08:08+02:00
New Revision: 807cc3791fabd47bd24d57a8e4ab7df9117b3ede

URL: https://github.com/llvm/llvm-project/commit/807cc3791fabd47bd24d57a8e4ab7df9117b3ede
DIFF: https://github.com/llvm/llvm-project/commit/807cc3791fabd47bd24d57a8e4ab7df9117b3ede.diff

LOG: [DAGCombiner] Fold subtraction if above threshold to `umin` (#134235)

Folds patterns such as:

    unsigned foo(unsigned x, unsigned y) {
      return x >= y ? x - y : x;
    }

Before, on RISC-V:

    sltu    a2, a0, a1
    addi    a2, a2, -1
    and     a1, a1, a2
    subw    a0, a0, a1

Or, with Zicond:

    sltu    a2, a0, a1
    czero.nez a1, a1, a2
    subw    a0, a0, a1

After, with Zbb:

    subw    a1, a0, a1
    minu    a0, a0, a1

Only applies to unsigned comparisons.
If `x >= y` then `x - y` is less than or equal `x`.
Otherwise, `x - y` wraps and is greater than `x`.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/RISCV/rv32zbb.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..9a28caa758659 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4251,6 +4251,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
     return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
 
+  // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+  // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
+  auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
+  if ((LK.first == TargetLoweringBase::TypeLegal ||
+       LK.first == TargetLoweringBase::TypePromoteInteger) &&
+      TLI.isOperationLegal(ISD::UMIN, LK.second)) {
+    SDValue Y;
+    if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+                                               m_SpecificCondCode(ISD::SETULT)),
+                                       m_Zero(), m_Deferred(Y)))) ||
+        sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+                                               m_SpecificCondCode(ISD::SETUGE)),
+                                       m_Deferred(Y), m_Zero()))))
+      return DAG.getNode(ISD::UMIN, DL, VT, N0,
+                         DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 8103ddcc427cd..5afc5ecb44098 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1481,15 +1481,23 @@ entry:
 }
 
 define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: sub_if_uge_i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    zext.b a2, a1
-; CHECK-NEXT:    zext.b a3, a0
-; CHECK-NEXT:    sltu a2, a3, a2
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    zext.b a2, a1
+; RV32I-NEXT:    zext.b a3, a0
+; RV32I-NEXT:    sltu a2, a3, a2
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    zext.b a2, a0
+; RV32ZBB-NEXT:    sub a0, a0, a1
+; RV32ZBB-NEXT:    zext.b a0, a0
+; RV32ZBB-NEXT:    minu a0, a2, a0
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i8 %x, %y
   %select = select i1 %cmp, i8 0, i8 %y
   %sub = sub nuw i8 %x, %select
@@ -1511,12 +1519,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 ;
 ; RV32ZBB-LABEL: sub_if_uge_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    zext.h a2, a1
-; RV32ZBB-NEXT:    zext.h a3, a0
-; RV32ZBB-NEXT:    sltu a2, a3, a2
-; RV32ZBB-NEXT:    addi a2, a2, -1
-; RV32ZBB-NEXT:    and a1, a2, a1
+; RV32ZBB-NEXT:    zext.h a2, a0
 ; RV32ZBB-NEXT:    sub a0, a0, a1
+; RV32ZBB-NEXT:    zext.h a0, a0
+; RV32ZBB-NEXT:    minu a0, a2, a0
 ; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i16 %x, %y
   %select = select i1 %cmp, i16 0, i16 %y
@@ -1525,13 +1531,19 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 }
 
 define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a2, a0, a1
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sub a1, a0, a1
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
@@ -1643,21 +1655,35 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
 }
 
 define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a2, a0, a1
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a2, a2, a1
-; CHECK-NEXT:    sub a2, a0, a2
-; CHECK-NEXT:    bltu a0, a1, .LBB55_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    sll a0, a2, a0
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB55_2:
-; CHECK-NEXT:    li a0, 2
-; CHECK-NEXT:    sll a0, a2, a0
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a2, a2, a1
+; RV32I-NEXT:    sub a2, a0, a2
+; RV32I-NEXT:    bltu a0, a1, .LBB55_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    li a0, 4
+; RV32I-NEXT:    sll a0, a2, a0
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB55_2:
+; RV32I-NEXT:    li a0, 2
+; RV32I-NEXT:    sll a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sub a2, a0, a1
+; RV32ZBB-NEXT:    minu a2, a0, a2
+; RV32ZBB-NEXT:    bltu a0, a1, .LBB55_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    li a0, 4
+; RV32ZBB-NEXT:    sll a0, a2, a0
+; RV32ZBB-NEXT:    ret
+; RV32ZBB-NEXT:  .LBB55_2:
+; RV32ZBB-NEXT:    li a0, 2
+; RV32ZBB-NEXT:    sll a0, a2, a0
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
@@ -1665,3 +1691,30 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
   %shl = shl i32 %sub, %select2
   ret i32 %shl
 }
+
+define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a3, a0, a1
+; RV32I-NEXT:    xori a4, a3, 1
+; RV32I-NEXT:    addi a3, a3, -1
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    sw a4, 0(a2)
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sltu a3, a0, a1
+; RV32ZBB-NEXT:    sub a1, a0, a1
+; RV32ZBB-NEXT:    xori a3, a3, 1
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    sw a3, 0(a2)
+; RV32ZBB-NEXT:    ret
+  %cmp = icmp uge i32 %x, %y
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, ptr %z, align 4
+  %select = select i1 %cmp, i32 %y, i32 0
+  %sub = sub nuw i32 %x, %select
+  ret i32 %sub
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 844f6796f10b6..2ae10da967754 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1682,15 +1682,23 @@ entry:
 }
 
 define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: sub_if_uge_i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    zext.b a2, a1
-; CHECK-NEXT:    zext.b a3, a0
-; CHECK-NEXT:    sltu a2, a3, a2
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    zext.b a2, a1
+; RV64I-NEXT:    zext.b a3, a0
+; RV64I-NEXT:    sltu a2, a3, a2
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a2, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.b a2, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    zext.b a0, a0
+; RV64ZBB-NEXT:    minu a0, a2, a0
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i8 %x, %y
   %select = select i1 %cmp, i8 0, i8 %y
   %sub = sub nuw i8 %x, %select
@@ -1712,12 +1720,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 ;
 ; RV64ZBB-LABEL: sub_if_uge_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    zext.h a2, a1
-; RV64ZBB-NEXT:    zext.h a3, a0
-; RV64ZBB-NEXT:    sltu a2, a3, a2
-; RV64ZBB-NEXT:    addi a2, a2, -1
-; RV64ZBB-NEXT:    and a1, a2, a1
-; RV64ZBB-NEXT:    sub a0, a0, a1
+; RV64ZBB-NEXT:    zext.h a2, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    minu a0, a2, a0
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i16 %x, %y
   %select = select i1 %cmp, i16 0, i16 %y
@@ -1726,15 +1732,22 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 }
 
 define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sext.w a2, a1
-; CHECK-NEXT:    sext.w a3, a0
-; CHECK-NEXT:    sltu a2, a3, a2
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    subw a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    sltu a2, a3, a2
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a2, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.w a2, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    minu a0, a2, a0
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
@@ -1742,13 +1755,19 @@ define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
 }
 
 define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
-; CHECK-LABEL: sub_if_uge_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a2, a0, a1
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltu a2, a0, a1
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a2, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sub a1, a0, a1
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i64 %x, %y
   %select = select i1 %cmp, i64 0, i64 %y
   %sub = sub nuw i64 %x, %select
@@ -1798,23 +1817,39 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
 }
 
 define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sext.w a2, a1
-; CHECK-NEXT:    sext.w a3, a0
-; CHECK-NEXT:    sltu a4, a3, a2
-; CHECK-NEXT:    addi a4, a4, -1
-; CHECK-NEXT:    and a1, a4, a1
-; CHECK-NEXT:    subw a0, a0, a1
-; CHECK-NEXT:    bltu a3, a2, .LBB68_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 4
-; CHECK-NEXT:    sllw a0, a0, a1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB68_2:
-; CHECK-NEXT:    li a1, 2
-; CHECK-NEXT:    sllw a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    sltu a4, a3, a2
+; RV64I-NEXT:    addi a4, a4, -1
+; RV64I-NEXT:    and a1, a4, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    bltu a3, a2, .LBB68_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    li a1, 4
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB68_2:
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.w a2, a1
+; RV64ZBB-NEXT:    sext.w a3, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    minu a0, a3, a0
+; RV64ZBB-NEXT:    bltu a3, a2, .LBB68_2
+; RV64ZBB-NEXT:  # %bb.1:
+; RV64ZBB-NEXT:    li a1, 4
+; RV64ZBB-NEXT:    sllw a0, a0, a1
+; RV64ZBB-NEXT:    ret
+; RV64ZBB-NEXT:  .LBB68_2:
+; RV64ZBB-NEXT:    li a1, 2
+; RV64ZBB-NEXT:    sllw a0, a0, a1
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
@@ -1822,3 +1857,30 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
   %shl = shl i32 %sub, %select2
   ret i32 %shl
 }
+
+define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, ptr %z) {
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltu a3, a0, a1
+; RV64I-NEXT:    xori a4, a3, 1
+; RV64I-NEXT:    addi a3, a3, -1
+; RV64I-NEXT:    and a1, a3, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    sw a4, 0(a2)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sltu a3, a0, a1
+; RV64ZBB-NEXT:    subw a1, a0, a1
+; RV64ZBB-NEXT:    xori a3, a3, 1
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    sw a3, 0(a2)
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp uge i32 %x, %y
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, ptr %z, align 4
+  %select = select i1 %cmp, i32 %y, i32 0
+  %sub = sub nuw i32 %x, %select
+  ret i32 %sub
+}


        


More information about the llvm-commits mailing list