[llvm] [DAGCombiner] Fold subtraction if above a constant threshold to `umin` (PR #135194)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 08:00:31 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Piotr Fusik (pfusik)
<details>
<summary>Changes</summary>
Like #<!-- -->134235, but with a constant.
It's used in Adler-32 checksum calculation in zlib.
Example:
unsigned adler32_mod(unsigned x) {
return x >= 65521u ? x - 65521u : x;
}
Before, on RISC-V:
lui a1, 16
lui a2, 1048560
addiw a1, a1, -16
sltu a1, a1, a0
negw a1, a1
addi a2, a2, 15
and a1, a1, a2
addw a0, a0, a1
Or, with Zicond:
lui a1, 16
lui a2, 1048560
addiw a1, a1, -16
sltu a1, a1, a0
addi a2, a2, 15
czero.eqz a1, a2, a1
addw a0, a0, a1
After, with Zbb:
lui a1, 1048560
addi a1, a1, 15
addw a1, a0, a1
minu a0, a1, a0
---
Full diff: https://github.com/llvm/llvm-project/pull/135194.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+14-4)
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+163)
- (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+162)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a28caa758659..46ab342d6f899 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -845,6 +845,13 @@ namespace {
return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
}
+ bool hasUMin(EVT VT) const {
+ auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
+ return (LK.first == TargetLoweringBase::TypeLegal ||
+ LK.first == TargetLoweringBase::TypePromoteInteger) &&
+ TLI.isOperationLegal(ISD::UMIN, LK.second);
+ }
+
public:
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -4253,10 +4260,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
// (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
- auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
- if ((LK.first == TargetLoweringBase::TypeLegal ||
- LK.first == TargetLoweringBase::TypePromoteInteger) &&
- TLI.isOperationLegal(ISD::UMIN, LK.second)) {
+ if (hasUMin(VT)) {
SDValue Y;
if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
m_SpecificCondCode(ISD::SETULT)),
@@ -12074,6 +12078,12 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
return NewSel;
+
+ // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
+ APInt C;
+ if (CC == ISD::SETUGT && Cond0 == N2 && sd_match(Cond1, m_ConstInt(C)) &&
+ sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C))) && hasUMin(VT))
+ return DAG.getNode(ISD::UMIN, DL, VT, N1, N2);
}
if (!VT.isVector())
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 5afc5ecb44098..9c84d5c18e8f7 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1718,3 +1718,166 @@ define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
%sub = sub nuw i32 %x, %select
ret i32 %sub
}
+
+define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
+; RV32I-LABEL: sub_if_uge_C_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltiu a1, a0, 13
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: andi a1, a1, -13
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: addi a1, a0, -13
+; RV32ZBB-NEXT: zext.b a1, a1
+; RV32ZBB-NEXT: minu a0, a1, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ugt i8 %x, 12
+ %sub = add i8 %x, -13
+ %conv4 = select i1 %cmp, i8 %sub, i8 %x
+ ret i8 %conv4
+}
+
+define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
+; RV32I-LABEL: sub_if_uge_C_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltiu a1, a0, 251
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: andi a1, a1, -251
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: addi a1, a0, -251
+; RV32ZBB-NEXT: zext.h a1, a1
+; RV32ZBB-NEXT: minu a0, a1, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ugt i16 %x, 250
+ %sub = add i16 %x, -251
+ %conv4 = select i1 %cmp, i16 %sub, i16 %x
+ ret i16 %conv4
+}
+
+define i32 @sub_if_uge_C_i32(i32 signext %x) {
+; RV32I-LABEL: sub_if_uge_C_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: lui a2, 1048560
+; RV32I-NEXT: addi a1, a1, -16
+; RV32I-NEXT: sltu a1, a1, a0
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: addi a2, a2, 15
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: lui a1, 1048560
+; RV32ZBB-NEXT: addi a1, a1, 15
+; RV32ZBB-NEXT: add a1, a0, a1
+; RV32ZBB-NEXT: minu a0, a1, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ugt i32 %x, 65520
+ %sub = add i32 %x, -65521
+ %cond = select i1 %cmp, i32 %sub, i32 %x
+ ret i32 %cond
+}
+
+define i64 @sub_if_uge_C_i64(i64 %x) {
+; CHECK-LABEL: sub_if_uge_C_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 1
+; CHECK-NEXT: beq a1, a2, .LBB60_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: sltiu a2, a1, 2
+; CHECK-NEXT: xori a2, a2, 1
+; CHECK-NEXT: j .LBB60_3
+; CHECK-NEXT: .LBB60_2:
+; CHECK-NEXT: lui a2, 172127
+; CHECK-NEXT: addi a2, a2, 511
+; CHECK-NEXT: sltu a2, a2, a0
+; CHECK-NEXT: .LBB60_3:
+; CHECK-NEXT: neg a2, a2
+; CHECK-NEXT: andi a3, a2, -2
+; CHECK-NEXT: add a1, a1, a3
+; CHECK-NEXT: lui a3, 876449
+; CHECK-NEXT: addi a3, a3, -512
+; CHECK-NEXT: and a2, a2, a3
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i64 %x, 4999999999
+ %sub = add i64 %x, -5000000000
+ %cond = select i1 %cmp, i64 %sub, i64 %x
+ ret i64 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
+; RV32I-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: lui a3, 1048560
+; RV32I-NEXT: addi a2, a2, -16
+; RV32I-NEXT: sltu a2, a2, a0
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: addi a3, a3, 15
+; RV32I-NEXT: and a3, a4, a3
+; RV32I-NEXT: add a0, a0, a3
+; RV32I-NEXT: sw a2, 0(a1)
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: lui a2, 16
+; RV32ZBB-NEXT: lui a3, 1048560
+; RV32ZBB-NEXT: addi a2, a2, -16
+; RV32ZBB-NEXT: addi a3, a3, 15
+; RV32ZBB-NEXT: sltu a2, a2, a0
+; RV32ZBB-NEXT: add a3, a0, a3
+; RV32ZBB-NEXT: minu a0, a3, a0
+; RV32ZBB-NEXT: sw a2, 0(a1)
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ugt i32 %x, 65520
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, ptr %z, align 4
+ %sub = add i32 %x, -65521
+ %cond = select i1 %cmp, i32 %sub, i32 %x
+ ret i32 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
+; RV32I-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 1048560
+; RV32I-NEXT: lui a3, 16
+; RV32I-NEXT: addi a2, a2, 15
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: addi a3, a3, -16
+; RV32I-NEXT: sw a2, 0(a1)
+; RV32I-NEXT: bltu a3, a0, .LBB62_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a2, a0
+; RV32I-NEXT: .LBB62_2:
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: lui a2, 1048560
+; RV32ZBB-NEXT: addi a2, a2, 15
+; RV32ZBB-NEXT: add a2, a0, a2
+; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: sw a2, 0(a1)
+; RV32ZBB-NEXT: ret
+ %sub = add i32 %x, -65521
+ store i32 %sub, ptr %z, align 4
+ %cmp = icmp ugt i32 %x, 65520
+ %cond = select i1 %cmp, i32 %sub, i32 %x
+ ret i32 %cond
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 2ae10da967754..8a0743cd762a7 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1884,3 +1884,165 @@ define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, pt
%sub = sub nuw i32 %x, %select
ret i32 %sub
}
+
+define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
+; RV64I-LABEL: sub_if_uge_C_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltiu a1, a0, 13
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: andi a1, a1, -13
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i8:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: addi a1, a0, -13
+; RV64ZBB-NEXT: zext.b a1, a1
+; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ugt i8 %x, 12
+ %sub = add i8 %x, -13
+ %conv4 = select i1 %cmp, i8 %sub, i8 %x
+ ret i8 %conv4
+}
+
+define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
+; RV64I-LABEL: sub_if_uge_C_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltiu a1, a0, 251
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: andi a1, a1, -251
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i16:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: addi a1, a0, -251
+; RV64ZBB-NEXT: zext.h a1, a1
+; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ugt i16 %x, 250
+ %sub = add i16 %x, -251
+ %conv4 = select i1 %cmp, i16 %sub, i16 %x
+ ret i16 %conv4
+}
+
+define i32 @sub_if_uge_C_i32(i32 signext %x) {
+; RV64I-LABEL: sub_if_uge_C_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: lui a2, 1048560
+; RV64I-NEXT: addiw a1, a1, -16
+; RV64I-NEXT: sltu a1, a1, a0
+; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: addi a2, a2, 15
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: addw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a1, 1048560
+; RV64ZBB-NEXT: addi a1, a1, 15
+; RV64ZBB-NEXT: addw a1, a0, a1
+; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ugt i32 %x, 65520
+ %sub = add i32 %x, -65521
+ %cond = select i1 %cmp, i32 %sub, i32 %x
+ ret i32 %cond
+}
+
+define i64 @sub_if_uge_C_i64(i64 %x) {
+; RV64I-LABEL: sub_if_uge_C_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a1, 298
+; RV64I-NEXT: lui a2, 1046192
+; RV64I-NEXT: addiw a1, a1, 95
+; RV64I-NEXT: addiw a2, a2, -761
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 511
+; RV64I-NEXT: sltu a1, a1, a0
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: slli a2, a2, 9
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a1, 1046192
+; RV64ZBB-NEXT: addiw a1, a1, -761
+; RV64ZBB-NEXT: slli a1, a1, 9
+; RV64ZBB-NEXT: add a1, a0, a1
+; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ugt i64 %x, 4999999999
+ %sub = add i64 %x, -5000000000
+ %cond = select i1 %cmp, i64 %sub, i64 %x
+ ret i64 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
+; RV64I-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: lui a3, 1048560
+; RV64I-NEXT: addiw a2, a2, -16
+; RV64I-NEXT: sltu a2, a2, a0
+; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: addi a3, a3, 15
+; RV64I-NEXT: and a3, a4, a3
+; RV64I-NEXT: addw a0, a0, a3
+; RV64I-NEXT: sw a2, 0(a1)
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a2, 16
+; RV64ZBB-NEXT: lui a3, 1048560
+; RV64ZBB-NEXT: addiw a2, a2, -16
+; RV64ZBB-NEXT: addi a3, a3, 15
+; RV64ZBB-NEXT: sltu a2, a2, a0
+; RV64ZBB-NEXT: addw a3, a0, a3
+; RV64ZBB-NEXT: minu a0, a3, a0
+; RV64ZBB-NEXT: sw a2, 0(a1)
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ugt i32 %x, 65520
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, ptr %z, align 4
+ %sub = add i32 %x, -65521
+ %cond = select i1 %cmp, i32 %sub, i32 %x
+ ret i32 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
+; RV64I-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 1048560
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addi a2, a2, 15
+; RV64I-NEXT: addw a2, a0, a2
+; RV64I-NEXT: addiw a3, a3, -16
+; RV64I-NEXT: sw a2, 0(a1)
+; RV64I-NEXT: bltu a3, a0, .LBB75_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a2, a0
+; RV64I-NEXT: .LBB75_2:
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a2, 1048560
+; RV64ZBB-NEXT: addi a2, a2, 15
+; RV64ZBB-NEXT: addw a2, a0, a2
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: sw a2, 0(a1)
+; RV64ZBB-NEXT: ret
+ %sub = add i32 %x, -65521
+ store i32 %sub, ptr %z, align 4
+ %cmp = icmp ugt i32 %x, 65520
+ %cond = select i1 %cmp, i32 %sub, i32 %x
+ ret i32 %cond
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/135194
More information about the llvm-commits
mailing list