[llvm] [DAGCombiner] Fold subtraction if above threshold to `umin` (PR #134235)

Piotr Fusik via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 9 22:50:24 PDT 2025


https://github.com/pfusik updated https://github.com/llvm/llvm-project/pull/134235

>From d67978a2e08e1d7de5991054900c2070b73a60f7 Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Thu, 3 Apr 2025 13:14:40 +0200
Subject: [PATCH 1/5] [RISCV] Fold subtraction if above threshold to Zbb `minu`

Folds patterns such as:

    unsigned foo(unsigned x, unsigned y) {
      return x >= y ? x - y : x;
    }

Before:

    sltu    a2, a0, a1
    addi    a2, a2, -1
    and     a1, a1, a2
    subw    a0, a0, a1

Or, with Zicond:

    sltu    a2, a0, a1
    czero.nez       a1, a1, a2
    subw    a0, a0, a1

After:

    subw    a1, a0, a1
    minu    a0, a0, a1

Only applies to unsigned comparisons.
If `x >= y` then `x - y` is less than or equal `x`.
Otherwise, `x - y` wraps and is greater than `x`.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  13 +
 llvm/test/CodeGen/RISCV/rv32zbb.ll          | 326 +++++++++++++-------
 llvm/test/CodeGen/RISCV/rv64zbb.ll          | 185 +++++++----
 3 files changed, 349 insertions(+), 175 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c79b3f0b76390..8002f7736ad30 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14917,6 +14917,19 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
     return V;
 
+  if (Subtarget.hasStdExtZbb()) {
+    // fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+    using namespace llvm::SDPatternMatch;
+    SDValue Y;
+    if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+                                               m_SpecificCondCode(ISD::SETULT)),
+                                       m_Zero(), m_Deferred(Y))))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::UMIN, DL, VT, N0,
+                         DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+    }
+  }
+
   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
   //      (select lhs, rhs, cc, x, (sub x, y))
   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 8103ddcc427cd..50b198443b3a8 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1481,15 +1481,23 @@ entry:
 }
 
 define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: sub_if_uge_i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    zext.b a2, a1
-; CHECK-NEXT:    zext.b a3, a0
-; CHECK-NEXT:    sltu a2, a3, a2
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    zext.b a2, a1
+; RV32I-NEXT:    zext.b a3, a0
+; RV32I-NEXT:    sltu a2, a3, a2
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    zext.b a2, a0
+; RV32ZBB-NEXT:    sub a0, a0, a1
+; RV32ZBB-NEXT:    zext.b a0, a0
+; RV32ZBB-NEXT:    minu a0, a2, a0
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i8 %x, %y
   %select = select i1 %cmp, i8 0, i8 %y
   %sub = sub nuw i8 %x, %select
@@ -1511,12 +1519,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 ;
 ; RV32ZBB-LABEL: sub_if_uge_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    zext.h a2, a1
-; RV32ZBB-NEXT:    zext.h a3, a0
-; RV32ZBB-NEXT:    sltu a2, a3, a2
-; RV32ZBB-NEXT:    addi a2, a2, -1
-; RV32ZBB-NEXT:    and a1, a2, a1
+; RV32ZBB-NEXT:    zext.h a2, a0
 ; RV32ZBB-NEXT:    sub a0, a0, a1
+; RV32ZBB-NEXT:    zext.h a0, a0
+; RV32ZBB-NEXT:    minu a0, a2, a0
 ; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i16 %x, %y
   %select = select i1 %cmp, i16 0, i16 %y
@@ -1525,13 +1531,19 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 }
 
 define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a2, a0, a1
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sub a1, a0, a1
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
@@ -1539,23 +1551,43 @@ define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
 }
 
 define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
-; CHECK-LABEL: sub_if_uge_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    beq a1, a3, .LBB52_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    sltu a4, a1, a3
-; CHECK-NEXT:    j .LBB52_3
-; CHECK-NEXT:  .LBB52_2:
-; CHECK-NEXT:    sltu a4, a0, a2
-; CHECK-NEXT:  .LBB52_3:
-; CHECK-NEXT:    addi a4, a4, -1
-; CHECK-NEXT:    and a3, a4, a3
-; CHECK-NEXT:    and a2, a4, a2
-; CHECK-NEXT:    sltu a4, a0, a2
-; CHECK-NEXT:    sub a1, a1, a3
-; CHECK-NEXT:    sub a1, a1, a4
-; CHECK-NEXT:    sub a0, a0, a2
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beq a1, a3, .LBB52_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a1, a3
+; RV32I-NEXT:    j .LBB52_3
+; RV32I-NEXT:  .LBB52_2:
+; RV32I-NEXT:    sltu a4, a0, a2
+; RV32I-NEXT:  .LBB52_3:
+; RV32I-NEXT:    addi a4, a4, -1
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    and a2, a4, a2
+; RV32I-NEXT:    sltu a4, a0, a2
+; RV32I-NEXT:    sub a1, a1, a3
+; RV32I-NEXT:    sub a1, a1, a4
+; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sltu a4, a0, a2
+; RV32ZBB-NEXT:    sub a3, a1, a3
+; RV32ZBB-NEXT:    sub a3, a3, a4
+; RV32ZBB-NEXT:    sub a2, a0, a2
+; RV32ZBB-NEXT:    beq a1, a3, .LBB52_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    sltu a4, a1, a3
+; RV32ZBB-NEXT:    beqz a4, .LBB52_3
+; RV32ZBB-NEXT:    j .LBB52_4
+; RV32ZBB-NEXT:  .LBB52_2:
+; RV32ZBB-NEXT:    sltu a4, a0, a2
+; RV32ZBB-NEXT:    bnez a4, .LBB52_4
+; RV32ZBB-NEXT:  .LBB52_3:
+; RV32ZBB-NEXT:    mv a0, a2
+; RV32ZBB-NEXT:    mv a1, a3
+; RV32ZBB-NEXT:  .LBB52_4:
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i64 %x, %y
   %select = select i1 %cmp, i64 0, i64 %y
   %sub = sub nuw i64 %x, %select
@@ -1563,63 +1595,123 @@ define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
 }
 
 define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
-; CHECK-LABEL: sub_if_uge_i128:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lw a7, 4(a2)
-; CHECK-NEXT:    lw a6, 8(a2)
-; CHECK-NEXT:    lw t0, 12(a2)
-; CHECK-NEXT:    lw a4, 12(a1)
-; CHECK-NEXT:    lw a3, 4(a1)
-; CHECK-NEXT:    lw a5, 8(a1)
-; CHECK-NEXT:    beq a4, t0, .LBB53_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    sltu t1, a4, t0
-; CHECK-NEXT:    j .LBB53_3
-; CHECK-NEXT:  .LBB53_2:
-; CHECK-NEXT:    sltu t1, a5, a6
-; CHECK-NEXT:  .LBB53_3:
-; CHECK-NEXT:    lw a2, 0(a2)
-; CHECK-NEXT:    lw a1, 0(a1)
-; CHECK-NEXT:    beq a3, a7, .LBB53_5
-; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    sltu t2, a3, a7
-; CHECK-NEXT:    j .LBB53_6
-; CHECK-NEXT:  .LBB53_5:
-; CHECK-NEXT:    sltu t2, a1, a2
-; CHECK-NEXT:  .LBB53_6:
-; CHECK-NEXT:    xor t3, a4, t0
-; CHECK-NEXT:    xor t4, a5, a6
-; CHECK-NEXT:    or t3, t4, t3
-; CHECK-NEXT:    beqz t3, .LBB53_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mv t2, t1
-; CHECK-NEXT:  .LBB53_8:
-; CHECK-NEXT:    addi t2, t2, -1
-; CHECK-NEXT:    and t1, t2, t0
-; CHECK-NEXT:    and t0, t2, a2
-; CHECK-NEXT:    and a7, t2, a7
-; CHECK-NEXT:    sltu a2, a1, t0
-; CHECK-NEXT:    and t2, t2, a6
-; CHECK-NEXT:    mv a6, a2
-; CHECK-NEXT:    beq a3, a7, .LBB53_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    sltu a6, a3, a7
-; CHECK-NEXT:  .LBB53_10:
-; CHECK-NEXT:    sub t3, a5, t2
-; CHECK-NEXT:    sltu a5, a5, t2
-; CHECK-NEXT:    sub a4, a4, t1
-; CHECK-NEXT:    sub a3, a3, a7
-; CHECK-NEXT:    sub a1, a1, t0
-; CHECK-NEXT:    sltu a7, t3, a6
-; CHECK-NEXT:    sub a4, a4, a5
-; CHECK-NEXT:    sub a5, t3, a6
-; CHECK-NEXT:    sub a3, a3, a2
-; CHECK-NEXT:    sub a2, a4, a7
-; CHECK-NEXT:    sw a1, 0(a0)
-; CHECK-NEXT:    sw a3, 4(a0)
-; CHECK-NEXT:    sw a5, 8(a0)
-; CHECK-NEXT:    sw a2, 12(a0)
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_i128:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lw a7, 4(a2)
+; RV32I-NEXT:    lw a6, 8(a2)
+; RV32I-NEXT:    lw t0, 12(a2)
+; RV32I-NEXT:    lw a4, 12(a1)
+; RV32I-NEXT:    lw a3, 4(a1)
+; RV32I-NEXT:    lw a5, 8(a1)
+; RV32I-NEXT:    beq a4, t0, .LBB53_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu t1, a4, t0
+; RV32I-NEXT:    j .LBB53_3
+; RV32I-NEXT:  .LBB53_2:
+; RV32I-NEXT:    sltu t1, a5, a6
+; RV32I-NEXT:  .LBB53_3:
+; RV32I-NEXT:    lw a2, 0(a2)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    beq a3, a7, .LBB53_5
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    sltu t2, a3, a7
+; RV32I-NEXT:    j .LBB53_6
+; RV32I-NEXT:  .LBB53_5:
+; RV32I-NEXT:    sltu t2, a1, a2
+; RV32I-NEXT:  .LBB53_6:
+; RV32I-NEXT:    xor t3, a4, t0
+; RV32I-NEXT:    xor t4, a5, a6
+; RV32I-NEXT:    or t3, t4, t3
+; RV32I-NEXT:    beqz t3, .LBB53_8
+; RV32I-NEXT:  # %bb.7:
+; RV32I-NEXT:    mv t2, t1
+; RV32I-NEXT:  .LBB53_8:
+; RV32I-NEXT:    addi t2, t2, -1
+; RV32I-NEXT:    and t1, t2, t0
+; RV32I-NEXT:    and t0, t2, a2
+; RV32I-NEXT:    and a7, t2, a7
+; RV32I-NEXT:    sltu a2, a1, t0
+; RV32I-NEXT:    and t2, t2, a6
+; RV32I-NEXT:    mv a6, a2
+; RV32I-NEXT:    beq a3, a7, .LBB53_10
+; RV32I-NEXT:  # %bb.9:
+; RV32I-NEXT:    sltu a6, a3, a7
+; RV32I-NEXT:  .LBB53_10:
+; RV32I-NEXT:    sub t3, a5, t2
+; RV32I-NEXT:    sltu a5, a5, t2
+; RV32I-NEXT:    sub a4, a4, t1
+; RV32I-NEXT:    sub a3, a3, a7
+; RV32I-NEXT:    sub a1, a1, t0
+; RV32I-NEXT:    sltu a7, t3, a6
+; RV32I-NEXT:    sub a4, a4, a5
+; RV32I-NEXT:    sub a5, t3, a6
+; RV32I-NEXT:    sub a3, a3, a2
+; RV32I-NEXT:    sub a2, a4, a7
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    sw a3, 4(a0)
+; RV32I-NEXT:    sw a5, 8(a0)
+; RV32I-NEXT:    sw a2, 12(a0)
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i128:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    lw a7, 0(a2)
+; RV32ZBB-NEXT:    lw t0, 4(a2)
+; RV32ZBB-NEXT:    lw a5, 8(a2)
+; RV32ZBB-NEXT:    lw a6, 12(a2)
+; RV32ZBB-NEXT:    lw a2, 8(a1)
+; RV32ZBB-NEXT:    lw a3, 12(a1)
+; RV32ZBB-NEXT:    lw a4, 0(a1)
+; RV32ZBB-NEXT:    lw a1, 4(a1)
+; RV32ZBB-NEXT:    sltu t1, a2, a5
+; RV32ZBB-NEXT:    sub a6, a3, a6
+; RV32ZBB-NEXT:    sltu t2, a4, a7
+; RV32ZBB-NEXT:    sub a6, a6, t1
+; RV32ZBB-NEXT:    mv t1, t2
+; RV32ZBB-NEXT:    beq a1, t0, .LBB53_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    sltu t1, a1, t0
+; RV32ZBB-NEXT:  .LBB53_2:
+; RV32ZBB-NEXT:    sub t3, a2, a5
+; RV32ZBB-NEXT:    sltu a5, t3, t1
+; RV32ZBB-NEXT:    sub a5, a6, a5
+; RV32ZBB-NEXT:    sub a6, t3, t1
+; RV32ZBB-NEXT:    beq a3, a5, .LBB53_4
+; RV32ZBB-NEXT:  # %bb.3:
+; RV32ZBB-NEXT:    sltu t1, a3, a5
+; RV32ZBB-NEXT:    j .LBB53_5
+; RV32ZBB-NEXT:  .LBB53_4:
+; RV32ZBB-NEXT:    sltu t1, a2, a6
+; RV32ZBB-NEXT:  .LBB53_5:
+; RV32ZBB-NEXT:    sub t0, a1, t0
+; RV32ZBB-NEXT:    sub t0, t0, t2
+; RV32ZBB-NEXT:    sub a7, a4, a7
+; RV32ZBB-NEXT:    beq a1, t0, .LBB53_7
+; RV32ZBB-NEXT:  # %bb.6:
+; RV32ZBB-NEXT:    sltu t2, a1, t0
+; RV32ZBB-NEXT:    j .LBB53_8
+; RV32ZBB-NEXT:  .LBB53_7:
+; RV32ZBB-NEXT:    sltu t2, a4, a7
+; RV32ZBB-NEXT:  .LBB53_8:
+; RV32ZBB-NEXT:    xor t3, a3, a5
+; RV32ZBB-NEXT:    xor t4, a2, a6
+; RV32ZBB-NEXT:    or t3, t4, t3
+; RV32ZBB-NEXT:    beqz t3, .LBB53_10
+; RV32ZBB-NEXT:  # %bb.9:
+; RV32ZBB-NEXT:    mv t2, t1
+; RV32ZBB-NEXT:  .LBB53_10:
+; RV32ZBB-NEXT:    bnez t2, .LBB53_12
+; RV32ZBB-NEXT:  # %bb.11:
+; RV32ZBB-NEXT:    mv a4, a7
+; RV32ZBB-NEXT:    mv a1, t0
+; RV32ZBB-NEXT:    mv a2, a6
+; RV32ZBB-NEXT:    mv a3, a5
+; RV32ZBB-NEXT:  .LBB53_12:
+; RV32ZBB-NEXT:    sw a4, 0(a0)
+; RV32ZBB-NEXT:    sw a1, 4(a0)
+; RV32ZBB-NEXT:    sw a2, 8(a0)
+; RV32ZBB-NEXT:    sw a3, 12(a0)
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i128 %x, %y
   %select = select i1 %cmp, i128 0, i128 %y
   %sub = sub nuw i128 %x, %select
@@ -1643,21 +1735,35 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
 }
 
 define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a2, a0, a1
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a2, a2, a1
-; CHECK-NEXT:    sub a2, a0, a2
-; CHECK-NEXT:    bltu a0, a1, .LBB55_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    sll a0, a2, a0
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB55_2:
-; CHECK-NEXT:    li a0, 2
-; CHECK-NEXT:    sll a0, a2, a0
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a2, a2, a1
+; RV32I-NEXT:    sub a2, a0, a2
+; RV32I-NEXT:    bltu a0, a1, .LBB55_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    li a0, 4
+; RV32I-NEXT:    sll a0, a2, a0
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB55_2:
+; RV32I-NEXT:    li a0, 2
+; RV32I-NEXT:    sll a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sub a2, a0, a1
+; RV32ZBB-NEXT:    minu a2, a0, a2
+; RV32ZBB-NEXT:    bltu a0, a1, .LBB55_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    li a0, 4
+; RV32ZBB-NEXT:    sll a0, a2, a0
+; RV32ZBB-NEXT:    ret
+; RV32ZBB-NEXT:  .LBB55_2:
+; RV32ZBB-NEXT:    li a0, 2
+; RV32ZBB-NEXT:    sll a0, a2, a0
+; RV32ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 844f6796f10b6..b59cef2e8458b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1682,15 +1682,23 @@ entry:
 }
 
 define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: sub_if_uge_i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    zext.b a2, a1
-; CHECK-NEXT:    zext.b a3, a0
-; CHECK-NEXT:    sltu a2, a3, a2
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    zext.b a2, a1
+; RV64I-NEXT:    zext.b a3, a0
+; RV64I-NEXT:    sltu a2, a3, a2
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a2, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.b a2, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    zext.b a0, a0
+; RV64ZBB-NEXT:    minu a0, a2, a0
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i8 %x, %y
   %select = select i1 %cmp, i8 0, i8 %y
   %sub = sub nuw i8 %x, %select
@@ -1712,12 +1720,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 ;
 ; RV64ZBB-LABEL: sub_if_uge_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    zext.h a2, a1
-; RV64ZBB-NEXT:    zext.h a3, a0
-; RV64ZBB-NEXT:    sltu a2, a3, a2
-; RV64ZBB-NEXT:    addi a2, a2, -1
-; RV64ZBB-NEXT:    and a1, a2, a1
-; RV64ZBB-NEXT:    sub a0, a0, a1
+; RV64ZBB-NEXT:    zext.h a2, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    minu a0, a2, a0
 ; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i16 %x, %y
   %select = select i1 %cmp, i16 0, i16 %y
@@ -1726,15 +1732,22 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
 }
 
 define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sext.w a2, a1
-; CHECK-NEXT:    sext.w a3, a0
-; CHECK-NEXT:    sltu a2, a3, a2
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    subw a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    sltu a2, a3, a2
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a2, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.w a2, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    minu a0, a2, a0
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select
@@ -1742,13 +1755,19 @@ define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
 }
 
 define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
-; CHECK-LABEL: sub_if_uge_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a2, a0, a1
-; CHECK-NEXT:    addi a2, a2, -1
-; CHECK-NEXT:    and a1, a2, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltu a2, a0, a1
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a1, a2, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sub a1, a0, a1
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i64 %x, %y
   %select = select i1 %cmp, i64 0, i64 %y
   %sub = sub nuw i64 %x, %select
@@ -1756,23 +1775,43 @@ define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
 }
 
 define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
-; CHECK-LABEL: sub_if_uge_i128:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    beq a1, a3, .LBB66_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    sltu a4, a1, a3
-; CHECK-NEXT:    j .LBB66_3
-; CHECK-NEXT:  .LBB66_2:
-; CHECK-NEXT:    sltu a4, a0, a2
-; CHECK-NEXT:  .LBB66_3:
-; CHECK-NEXT:    addi a4, a4, -1
-; CHECK-NEXT:    and a3, a4, a3
-; CHECK-NEXT:    and a2, a4, a2
-; CHECK-NEXT:    sltu a4, a0, a2
-; CHECK-NEXT:    sub a1, a1, a3
-; CHECK-NEXT:    sub a1, a1, a4
-; CHECK-NEXT:    sub a0, a0, a2
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_i128:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beq a1, a3, .LBB66_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    sltu a4, a1, a3
+; RV64I-NEXT:    j .LBB66_3
+; RV64I-NEXT:  .LBB66_2:
+; RV64I-NEXT:    sltu a4, a0, a2
+; RV64I-NEXT:  .LBB66_3:
+; RV64I-NEXT:    addi a4, a4, -1
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    and a2, a4, a2
+; RV64I-NEXT:    sltu a4, a0, a2
+; RV64I-NEXT:    sub a1, a1, a3
+; RV64I-NEXT:    sub a1, a1, a4
+; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i128:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sltu a4, a0, a2
+; RV64ZBB-NEXT:    sub a3, a1, a3
+; RV64ZBB-NEXT:    sub a3, a3, a4
+; RV64ZBB-NEXT:    sub a2, a0, a2
+; RV64ZBB-NEXT:    beq a1, a3, .LBB66_2
+; RV64ZBB-NEXT:  # %bb.1:
+; RV64ZBB-NEXT:    sltu a4, a1, a3
+; RV64ZBB-NEXT:    beqz a4, .LBB66_3
+; RV64ZBB-NEXT:    j .LBB66_4
+; RV64ZBB-NEXT:  .LBB66_2:
+; RV64ZBB-NEXT:    sltu a4, a0, a2
+; RV64ZBB-NEXT:    bnez a4, .LBB66_4
+; RV64ZBB-NEXT:  .LBB66_3:
+; RV64ZBB-NEXT:    mv a0, a2
+; RV64ZBB-NEXT:    mv a1, a3
+; RV64ZBB-NEXT:  .LBB66_4:
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i128 %x, %y
   %select = select i1 %cmp, i128 0, i128 %y
   %sub = sub nuw i128 %x, %select
@@ -1798,23 +1837,39 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
 }
 
 define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sext.w a2, a1
-; CHECK-NEXT:    sext.w a3, a0
-; CHECK-NEXT:    sltu a4, a3, a2
-; CHECK-NEXT:    addi a4, a4, -1
-; CHECK-NEXT:    and a1, a4, a1
-; CHECK-NEXT:    subw a0, a0, a1
-; CHECK-NEXT:    bltu a3, a2, .LBB68_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 4
-; CHECK-NEXT:    sllw a0, a0, a1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB68_2:
-; CHECK-NEXT:    li a1, 2
-; CHECK-NEXT:    sllw a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    sltu a4, a3, a2
+; RV64I-NEXT:    addi a4, a4, -1
+; RV64I-NEXT:    and a1, a4, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    bltu a3, a2, .LBB68_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    li a1, 4
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB68_2:
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.w a2, a1
+; RV64ZBB-NEXT:    sext.w a3, a0
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    minu a0, a3, a0
+; RV64ZBB-NEXT:    bltu a3, a2, .LBB68_2
+; RV64ZBB-NEXT:  # %bb.1:
+; RV64ZBB-NEXT:    li a1, 4
+; RV64ZBB-NEXT:    sllw a0, a0, a1
+; RV64ZBB-NEXT:    ret
+; RV64ZBB-NEXT:  .LBB68_2:
+; RV64ZBB-NEXT:    li a1, 2
+; RV64ZBB-NEXT:    sllw a0, a0, a1
+; RV64ZBB-NEXT:    ret
   %cmp = icmp ult i32 %x, %y
   %select = select i1 %cmp, i32 0, i32 %y
   %sub = sub nuw i32 %x, %select

>From 75b5292de3b2266a6cd744108c55c817fd39f6a0 Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Thu, 3 Apr 2025 19:15:27 +0200
Subject: [PATCH 2/5] [RISCV] Prevent transform on >XLEN types

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |   6 +-
 llvm/test/CodeGen/RISCV/rv32zbb.ll          | 228 +++++++-------------
 llvm/test/CodeGen/RISCV/rv64zbb.ll          |  54 ++---
 3 files changed, 96 insertions(+), 192 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8002f7736ad30..f03ca2bd95e66 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14917,7 +14917,11 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
     return V;
 
-  if (Subtarget.hasStdExtZbb()) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
+  if ((LK.first == TargetLoweringBase::TypeLegal ||
+       LK.first == TargetLoweringBase::TypePromoteInteger) &&
+      TLI.isOperationLegal(ISD::UMIN, LK.second)) {
     // fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
     using namespace llvm::SDPatternMatch;
     SDValue Y;
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 50b198443b3a8..f87057358f384 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1551,43 +1551,23 @@ define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
 }
 
 define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
-; RV32I-LABEL: sub_if_uge_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    beq a1, a3, .LBB52_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltu a4, a1, a3
-; RV32I-NEXT:    j .LBB52_3
-; RV32I-NEXT:  .LBB52_2:
-; RV32I-NEXT:    sltu a4, a0, a2
-; RV32I-NEXT:  .LBB52_3:
-; RV32I-NEXT:    addi a4, a4, -1
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    sltu a4, a0, a2
-; RV32I-NEXT:    sub a1, a1, a3
-; RV32I-NEXT:    sub a1, a1, a4
-; RV32I-NEXT:    sub a0, a0, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: sub_if_uge_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    sltu a4, a0, a2
-; RV32ZBB-NEXT:    sub a3, a1, a3
-; RV32ZBB-NEXT:    sub a3, a3, a4
-; RV32ZBB-NEXT:    sub a2, a0, a2
-; RV32ZBB-NEXT:    beq a1, a3, .LBB52_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sltu a4, a1, a3
-; RV32ZBB-NEXT:    beqz a4, .LBB52_3
-; RV32ZBB-NEXT:    j .LBB52_4
-; RV32ZBB-NEXT:  .LBB52_2:
-; RV32ZBB-NEXT:    sltu a4, a0, a2
-; RV32ZBB-NEXT:    bnez a4, .LBB52_4
-; RV32ZBB-NEXT:  .LBB52_3:
-; RV32ZBB-NEXT:    mv a0, a2
-; RV32ZBB-NEXT:    mv a1, a3
-; RV32ZBB-NEXT:  .LBB52_4:
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: sub_if_uge_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    beq a1, a3, .LBB52_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    sltu a4, a1, a3
+; CHECK-NEXT:    j .LBB52_3
+; CHECK-NEXT:  .LBB52_2:
+; CHECK-NEXT:    sltu a4, a0, a2
+; CHECK-NEXT:  .LBB52_3:
+; CHECK-NEXT:    addi a4, a4, -1
+; CHECK-NEXT:    and a3, a4, a3
+; CHECK-NEXT:    and a2, a4, a2
+; CHECK-NEXT:    sltu a4, a0, a2
+; CHECK-NEXT:    sub a1, a1, a3
+; CHECK-NEXT:    sub a1, a1, a4
+; CHECK-NEXT:    sub a0, a0, a2
+; CHECK-NEXT:    ret
   %cmp = icmp ult i64 %x, %y
   %select = select i1 %cmp, i64 0, i64 %y
   %sub = sub nuw i64 %x, %select
@@ -1595,123 +1575,63 @@ define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
 }
 
 define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
-; RV32I-LABEL: sub_if_uge_i128:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a7, 4(a2)
-; RV32I-NEXT:    lw a6, 8(a2)
-; RV32I-NEXT:    lw t0, 12(a2)
-; RV32I-NEXT:    lw a4, 12(a1)
-; RV32I-NEXT:    lw a3, 4(a1)
-; RV32I-NEXT:    lw a5, 8(a1)
-; RV32I-NEXT:    beq a4, t0, .LBB53_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltu t1, a4, t0
-; RV32I-NEXT:    j .LBB53_3
-; RV32I-NEXT:  .LBB53_2:
-; RV32I-NEXT:    sltu t1, a5, a6
-; RV32I-NEXT:  .LBB53_3:
-; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw a1, 0(a1)
-; RV32I-NEXT:    beq a3, a7, .LBB53_5
-; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    sltu t2, a3, a7
-; RV32I-NEXT:    j .LBB53_6
-; RV32I-NEXT:  .LBB53_5:
-; RV32I-NEXT:    sltu t2, a1, a2
-; RV32I-NEXT:  .LBB53_6:
-; RV32I-NEXT:    xor t3, a4, t0
-; RV32I-NEXT:    xor t4, a5, a6
-; RV32I-NEXT:    or t3, t4, t3
-; RV32I-NEXT:    beqz t3, .LBB53_8
-; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    mv t2, t1
-; RV32I-NEXT:  .LBB53_8:
-; RV32I-NEXT:    addi t2, t2, -1
-; RV32I-NEXT:    and t1, t2, t0
-; RV32I-NEXT:    and t0, t2, a2
-; RV32I-NEXT:    and a7, t2, a7
-; RV32I-NEXT:    sltu a2, a1, t0
-; RV32I-NEXT:    and t2, t2, a6
-; RV32I-NEXT:    mv a6, a2
-; RV32I-NEXT:    beq a3, a7, .LBB53_10
-; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    sltu a6, a3, a7
-; RV32I-NEXT:  .LBB53_10:
-; RV32I-NEXT:    sub t3, a5, t2
-; RV32I-NEXT:    sltu a5, a5, t2
-; RV32I-NEXT:    sub a4, a4, t1
-; RV32I-NEXT:    sub a3, a3, a7
-; RV32I-NEXT:    sub a1, a1, t0
-; RV32I-NEXT:    sltu a7, t3, a6
-; RV32I-NEXT:    sub a4, a4, a5
-; RV32I-NEXT:    sub a5, t3, a6
-; RV32I-NEXT:    sub a3, a3, a2
-; RV32I-NEXT:    sub a2, a4, a7
-; RV32I-NEXT:    sw a1, 0(a0)
-; RV32I-NEXT:    sw a3, 4(a0)
-; RV32I-NEXT:    sw a5, 8(a0)
-; RV32I-NEXT:    sw a2, 12(a0)
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: sub_if_uge_i128:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a7, 0(a2)
-; RV32ZBB-NEXT:    lw t0, 4(a2)
-; RV32ZBB-NEXT:    lw a5, 8(a2)
-; RV32ZBB-NEXT:    lw a6, 12(a2)
-; RV32ZBB-NEXT:    lw a2, 8(a1)
-; RV32ZBB-NEXT:    lw a3, 12(a1)
-; RV32ZBB-NEXT:    lw a4, 0(a1)
-; RV32ZBB-NEXT:    lw a1, 4(a1)
-; RV32ZBB-NEXT:    sltu t1, a2, a5
-; RV32ZBB-NEXT:    sub a6, a3, a6
-; RV32ZBB-NEXT:    sltu t2, a4, a7
-; RV32ZBB-NEXT:    sub a6, a6, t1
-; RV32ZBB-NEXT:    mv t1, t2
-; RV32ZBB-NEXT:    beq a1, t0, .LBB53_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sltu t1, a1, t0
-; RV32ZBB-NEXT:  .LBB53_2:
-; RV32ZBB-NEXT:    sub t3, a2, a5
-; RV32ZBB-NEXT:    sltu a5, t3, t1
-; RV32ZBB-NEXT:    sub a5, a6, a5
-; RV32ZBB-NEXT:    sub a6, t3, t1
-; RV32ZBB-NEXT:    beq a3, a5, .LBB53_4
-; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    sltu t1, a3, a5
-; RV32ZBB-NEXT:    j .LBB53_5
-; RV32ZBB-NEXT:  .LBB53_4:
-; RV32ZBB-NEXT:    sltu t1, a2, a6
-; RV32ZBB-NEXT:  .LBB53_5:
-; RV32ZBB-NEXT:    sub t0, a1, t0
-; RV32ZBB-NEXT:    sub t0, t0, t2
-; RV32ZBB-NEXT:    sub a7, a4, a7
-; RV32ZBB-NEXT:    beq a1, t0, .LBB53_7
-; RV32ZBB-NEXT:  # %bb.6:
-; RV32ZBB-NEXT:    sltu t2, a1, t0
-; RV32ZBB-NEXT:    j .LBB53_8
-; RV32ZBB-NEXT:  .LBB53_7:
-; RV32ZBB-NEXT:    sltu t2, a4, a7
-; RV32ZBB-NEXT:  .LBB53_8:
-; RV32ZBB-NEXT:    xor t3, a3, a5
-; RV32ZBB-NEXT:    xor t4, a2, a6
-; RV32ZBB-NEXT:    or t3, t4, t3
-; RV32ZBB-NEXT:    beqz t3, .LBB53_10
-; RV32ZBB-NEXT:  # %bb.9:
-; RV32ZBB-NEXT:    mv t2, t1
-; RV32ZBB-NEXT:  .LBB53_10:
-; RV32ZBB-NEXT:    bnez t2, .LBB53_12
-; RV32ZBB-NEXT:  # %bb.11:
-; RV32ZBB-NEXT:    mv a4, a7
-; RV32ZBB-NEXT:    mv a1, t0
-; RV32ZBB-NEXT:    mv a2, a6
-; RV32ZBB-NEXT:    mv a3, a5
-; RV32ZBB-NEXT:  .LBB53_12:
-; RV32ZBB-NEXT:    sw a4, 0(a0)
-; RV32ZBB-NEXT:    sw a1, 4(a0)
-; RV32ZBB-NEXT:    sw a2, 8(a0)
-; RV32ZBB-NEXT:    sw a3, 12(a0)
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: sub_if_uge_i128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a7, 4(a2)
+; CHECK-NEXT:    lw a6, 8(a2)
+; CHECK-NEXT:    lw t0, 12(a2)
+; CHECK-NEXT:    lw a4, 12(a1)
+; CHECK-NEXT:    lw a3, 4(a1)
+; CHECK-NEXT:    lw a5, 8(a1)
+; CHECK-NEXT:    beq a4, t0, .LBB53_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    sltu t1, a4, t0
+; CHECK-NEXT:    j .LBB53_3
+; CHECK-NEXT:  .LBB53_2:
+; CHECK-NEXT:    sltu t1, a5, a6
+; CHECK-NEXT:  .LBB53_3:
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    beq a3, a7, .LBB53_5
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    sltu t2, a3, a7
+; CHECK-NEXT:    j .LBB53_6
+; CHECK-NEXT:  .LBB53_5:
+; CHECK-NEXT:    sltu t2, a1, a2
+; CHECK-NEXT:  .LBB53_6:
+; CHECK-NEXT:    xor t3, a4, t0
+; CHECK-NEXT:    xor t4, a5, a6
+; CHECK-NEXT:    or t3, t4, t3
+; CHECK-NEXT:    beqz t3, .LBB53_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    mv t2, t1
+; CHECK-NEXT:  .LBB53_8:
+; CHECK-NEXT:    addi t2, t2, -1
+; CHECK-NEXT:    and t1, t2, t0
+; CHECK-NEXT:    and t0, t2, a2
+; CHECK-NEXT:    and a7, t2, a7
+; CHECK-NEXT:    sltu a2, a1, t0
+; CHECK-NEXT:    and t2, t2, a6
+; CHECK-NEXT:    mv a6, a2
+; CHECK-NEXT:    beq a3, a7, .LBB53_10
+; CHECK-NEXT:  # %bb.9:
+; CHECK-NEXT:    sltu a6, a3, a7
+; CHECK-NEXT:  .LBB53_10:
+; CHECK-NEXT:    sub t3, a5, t2
+; CHECK-NEXT:    sltu a5, a5, t2
+; CHECK-NEXT:    sub a4, a4, t1
+; CHECK-NEXT:    sub a3, a3, a7
+; CHECK-NEXT:    sub a1, a1, t0
+; CHECK-NEXT:    sltu a7, t3, a6
+; CHECK-NEXT:    sub a4, a4, a5
+; CHECK-NEXT:    sub a5, t3, a6
+; CHECK-NEXT:    sub a3, a3, a2
+; CHECK-NEXT:    sub a2, a4, a7
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    sw a3, 4(a0)
+; CHECK-NEXT:    sw a5, 8(a0)
+; CHECK-NEXT:    sw a2, 12(a0)
+; CHECK-NEXT:    ret
   %cmp = icmp ult i128 %x, %y
   %select = select i1 %cmp, i128 0, i128 %y
   %sub = sub nuw i128 %x, %select
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index b59cef2e8458b..4d3c28c733778 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1775,43 +1775,23 @@ define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
 }
 
 define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
-; RV64I-LABEL: sub_if_uge_i128:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    beq a1, a3, .LBB66_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    sltu a4, a1, a3
-; RV64I-NEXT:    j .LBB66_3
-; RV64I-NEXT:  .LBB66_2:
-; RV64I-NEXT:    sltu a4, a0, a2
-; RV64I-NEXT:  .LBB66_3:
-; RV64I-NEXT:    addi a4, a4, -1
-; RV64I-NEXT:    and a3, a4, a3
-; RV64I-NEXT:    and a2, a4, a2
-; RV64I-NEXT:    sltu a4, a0, a2
-; RV64I-NEXT:    sub a1, a1, a3
-; RV64I-NEXT:    sub a1, a1, a4
-; RV64I-NEXT:    sub a0, a0, a2
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: sub_if_uge_i128:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sltu a4, a0, a2
-; RV64ZBB-NEXT:    sub a3, a1, a3
-; RV64ZBB-NEXT:    sub a3, a3, a4
-; RV64ZBB-NEXT:    sub a2, a0, a2
-; RV64ZBB-NEXT:    beq a1, a3, .LBB66_2
-; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    sltu a4, a1, a3
-; RV64ZBB-NEXT:    beqz a4, .LBB66_3
-; RV64ZBB-NEXT:    j .LBB66_4
-; RV64ZBB-NEXT:  .LBB66_2:
-; RV64ZBB-NEXT:    sltu a4, a0, a2
-; RV64ZBB-NEXT:    bnez a4, .LBB66_4
-; RV64ZBB-NEXT:  .LBB66_3:
-; RV64ZBB-NEXT:    mv a0, a2
-; RV64ZBB-NEXT:    mv a1, a3
-; RV64ZBB-NEXT:  .LBB66_4:
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: sub_if_uge_i128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    beq a1, a3, .LBB66_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    sltu a4, a1, a3
+; CHECK-NEXT:    j .LBB66_3
+; CHECK-NEXT:  .LBB66_2:
+; CHECK-NEXT:    sltu a4, a0, a2
+; CHECK-NEXT:  .LBB66_3:
+; CHECK-NEXT:    addi a4, a4, -1
+; CHECK-NEXT:    and a3, a4, a3
+; CHECK-NEXT:    and a2, a4, a2
+; CHECK-NEXT:    sltu a4, a0, a2
+; CHECK-NEXT:    sub a1, a1, a3
+; CHECK-NEXT:    sub a1, a1, a4
+; CHECK-NEXT:    sub a0, a0, a2
+; CHECK-NEXT:    ret
   %cmp = icmp ult i128 %x, %y
   %select = select i1 %cmp, i128 0, i128 %y
   %sub = sub nuw i128 %x, %select

>From 130bbb6b57939086280fb0279e9419c18028e640 Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Mon, 7 Apr 2025 16:17:37 +0200
Subject: [PATCH 3/5] [DAGCombiner] Move the transform from RISCVISelLowering

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 +++++++++++++
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 17 -----------------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 38376de5783ae..c39a03d16163e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4251,6 +4251,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
     return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
 
+  // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+  auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
+  if ((LK.first == TargetLoweringBase::TypeLegal ||
+       LK.first == TargetLoweringBase::TypePromoteInteger) &&
+      TLI.isOperationLegal(ISD::UMIN, LK.second)) {
+    SDValue Y;
+    if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+                                               m_SpecificCondCode(ISD::SETULT)),
+                                       m_Zero(), m_Deferred(Y)))))
+      return DAG.getNode(ISD::UMIN, DL, VT, N0,
+                         DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f03ca2bd95e66..c79b3f0b76390 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14917,23 +14917,6 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
     return V;
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
-  if ((LK.first == TargetLoweringBase::TypeLegal ||
-       LK.first == TargetLoweringBase::TypePromoteInteger) &&
-      TLI.isOperationLegal(ISD::UMIN, LK.second)) {
-    // fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
-    using namespace llvm::SDPatternMatch;
-    SDValue Y;
-    if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
-                                               m_SpecificCondCode(ISD::SETULT)),
-                                       m_Zero(), m_Deferred(Y))))) {
-      SDLoc DL(N);
-      return DAG.getNode(ISD::UMIN, DL, VT, N0,
-                         DAG.getNode(ISD::SUB, DL, VT, N0, Y));
-    }
-  }
-
   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
   //      (select lhs, rhs, cc, x, (sub x, y))
   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);

>From d797364c454e01063ee4fe2ab8275552d7426d3d Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Thu, 10 Apr 2025 07:35:56 +0200
Subject: [PATCH 4/5] [RISCV][test] Add `uge` test for subtraction if above
 threshold

---
 llvm/test/CodeGen/RISCV/rv32zbb.ll | 18 ++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64zbb.ll | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index f87057358f384..a9a686b7d60e4 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1691,3 +1691,21 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
   %shl = shl i32 %sub, %select2
   ret i32 %shl
 }
+
+define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
+; CHECK-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sltu a3, a0, a1
+; CHECK-NEXT:    xori a4, a3, 1
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a1, a3, a1
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    sw a4, 0(a2)
+; CHECK-NEXT:    ret
+  %cmp = icmp uge i32 %x, %y
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, ptr %z, align 4
+  %select = select i1 %cmp, i32 %y, i32 0
+  %sub = sub nuw i32 %x, %select
+  ret i32 %sub
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 4d3c28c733778..0396914919977 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1857,3 +1857,21 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
   %shl = shl i32 %sub, %select2
   ret i32 %shl
 }
+
+define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, ptr %z) {
+; CHECK-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sltu a3, a0, a1
+; CHECK-NEXT:    xori a4, a3, 1
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a1, a3, a1
+; CHECK-NEXT:    subw a0, a0, a1
+; CHECK-NEXT:    sw a4, 0(a2)
+; CHECK-NEXT:    ret
+  %cmp = icmp uge i32 %x, %y
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, ptr %z, align 4
+  %select = select i1 %cmp, i32 %y, i32 0
+  %sub = sub nuw i32 %x, %select
+  ret i32 %sub
+}

>From 0a1c55bf2ccb5a8c97000bb41b3e44efc5e2ec4f Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Thu, 10 Apr 2025 07:49:16 +0200
Subject: [PATCH 5/5] [RISCV] Handle a variant with inverse comparison

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++++-
 llvm/test/CodeGen/RISCV/rv32zbb.ll            | 27 ++++++++++++-------
 llvm/test/CodeGen/RISCV/rv64zbb.ll            | 27 ++++++++++++-------
 3 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c39a03d16163e..967284944c658 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4252,6 +4252,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
 
   // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+  // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
   auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
   if ((LK.first == TargetLoweringBase::TypeLegal ||
        LK.first == TargetLoweringBase::TypePromoteInteger) &&
@@ -4259,7 +4260,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Y;
     if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
                                                m_SpecificCondCode(ISD::SETULT)),
-                                       m_Zero(), m_Deferred(Y)))))
+                                       m_Zero(), m_Deferred(Y)))) ||
+        sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+                                               m_SpecificCondCode(ISD::SETUGE)),
+                                       m_Deferred(Y), m_Zero()))))
       return DAG.getNode(ISD::UMIN, DL, VT, N0,
                          DAG.getNode(ISD::SUB, DL, VT, N0, Y));
   }
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index a9a686b7d60e4..5afc5ecb44098 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1693,15 +1693,24 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
 }
 
 define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_store_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a3, a0, a1
-; CHECK-NEXT:    xori a4, a3, 1
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a1, a3, a1
-; CHECK-NEXT:    sub a0, a0, a1
-; CHECK-NEXT:    sw a4, 0(a2)
-; CHECK-NEXT:    ret
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a3, a0, a1
+; RV32I-NEXT:    xori a4, a3, 1
+; RV32I-NEXT:    addi a3, a3, -1
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    sw a4, 0(a2)
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    sltu a3, a0, a1
+; RV32ZBB-NEXT:    sub a1, a0, a1
+; RV32ZBB-NEXT:    xori a3, a3, 1
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    sw a3, 0(a2)
+; RV32ZBB-NEXT:    ret
   %cmp = icmp uge i32 %x, %y
   %conv = zext i1 %cmp to i32
   store i32 %conv, ptr %z, align 4
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 0396914919977..2ae10da967754 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1859,15 +1859,24 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
 }
 
 define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, ptr %z) {
-; CHECK-LABEL: sub_if_uge_multiuse_cmp_store_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sltu a3, a0, a1
-; CHECK-NEXT:    xori a4, a3, 1
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a1, a3, a1
-; CHECK-NEXT:    subw a0, a0, a1
-; CHECK-NEXT:    sw a4, 0(a2)
-; CHECK-NEXT:    ret
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltu a3, a0, a1
+; RV64I-NEXT:    xori a4, a3, 1
+; RV64I-NEXT:    addi a3, a3, -1
+; RV64I-NEXT:    and a1, a3, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    sw a4, 0(a2)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sltu a3, a0, a1
+; RV64ZBB-NEXT:    subw a1, a0, a1
+; RV64ZBB-NEXT:    xori a3, a3, 1
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    sw a3, 0(a2)
+; RV64ZBB-NEXT:    ret
   %cmp = icmp uge i32 %x, %y
   %conv = zext i1 %cmp to i32
   store i32 %conv, ptr %z, align 4



More information about the llvm-commits mailing list