[llvm] 0b1318f - [DAG] Fold rem(rem(A, BCst), Op1Cst) -> rem(A, Op1Cst) (#159517)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 22 02:30:14 PDT 2025
Author: kper
Date: 2025-09-22T09:30:10Z
New Revision: 0b1318f2a88152c0d940d7919ea3b550fb2e34cb
URL: https://github.com/llvm/llvm-project/commit/0b1318f2a88152c0d940d7919ea3b550fb2e34cb
DIFF: https://github.com/llvm/llvm-project/commit/0b1318f2a88152c0d940d7919ea3b550fb2e34cb.diff
LOG: [DAG] Fold rem(rem(A, BCst), Op1Cst) -> rem(A, Op1Cst) (#159517)
Fixes [157370](https://github.com/llvm/llvm-project/issues/157370)
UREM General proof: https://alive2.llvm.org/ce/z/b_GQJX
SREM General proof: https://alive2.llvm.org/ce/z/Whkaxh
I have added it as rv32i and rv64i tests because they are the only architectures where I could verify that it works.
Added:
llvm/test/CodeGen/RISCV/srem.ll
llvm/test/CodeGen/RISCV/urem.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 91ae7b0b28ba7..a6ba6e518899f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5442,6 +5442,24 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem.getValue(1);
+ // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
+ // iff urem(BCst, Op1Cst) == 0
+ SDValue A;
+ APInt Op1Cst, BCst;
+ if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
+ m_ConstInt(Op1Cst))) &&
+ BCst.urem(Op1Cst).isZero()) {
+ return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
+ }
+
+ // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
+ // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
+ if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
+ m_ConstInt(Op1Cst))) &&
+ BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
+ return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/srem.ll b/llvm/test/CodeGen/RISCV/srem.ll
new file mode 100644
index 0000000000000..55a1850f18291
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/srem.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
+
+define i32 @fold_srem_constants(i32 %v0) nounwind {
+; RV32I-LABEL: fold_srem_constants:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: tail __modsi3
+;
+; RV64I-LABEL: fold_srem_constants:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __moddi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %v1 = srem i32 %v0, 25
+ %v2 = srem i32 %v1, 5
+ ret i32 %v2
+}
+
+define i32 @dont_fold_srem_constants(i32 %v0) nounwind {
+; RV32I-LABEL: dont_fold_srem_constants:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 25
+; RV32I-NEXT: call __modsi3
+; RV32I-NEXT: li a1, 3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: tail __modsi3
+;
+; RV64I-LABEL: dont_fold_srem_constants:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: li a1, 25
+; RV64I-NEXT: call __moddi3
+; RV64I-NEXT: li a1, 3
+; RV64I-NEXT: call __moddi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %v1 = srem i32 %v0, 25
+ %v2 = srem i32 %v1, 3
+ ret i32 %v2
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/urem.ll b/llvm/test/CodeGen/RISCV/urem.ll
new file mode 100644
index 0000000000000..faeeb396a698b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/urem.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
+
+define i32 @fold_urem_constants(i32 %v0) nounwind {
+; RV32I-LABEL: fold_urem_constants:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: tail __umodsi3
+;
+; RV64I-LABEL: fold_urem_constants:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __umoddi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %v1 = urem i32 %v0, 25
+ %v2 = urem i32 %v1, 5
+ ret i32 %v2
+}
+
+define i32 @dont_fold_urem_constants(i32 %v0) nounwind {
+; RV32I-LABEL: dont_fold_urem_constants:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 25
+; RV32I-NEXT: call __umodsi3
+; RV32I-NEXT: li a1, 3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: tail __umodsi3
+;
+; RV64I-LABEL: dont_fold_urem_constants:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: li a1, 25
+; RV64I-NEXT: call __umoddi3
+; RV64I-NEXT: li a1, 3
+; RV64I-NEXT: call __umoddi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %v1 = urem i32 %v0, 25
+ %v2 = urem i32 %v1, 3
+ ret i32 %v2
+}
+
+define i32 @dont_fold_urem_srem_mixed_constants(i32 %v0) nounwind {
+; RV32I-LABEL: dont_fold_urem_srem_mixed_constants:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 25
+; RV32I-NEXT: call __umodsi3
+; RV32I-NEXT: li a1, 3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: tail __umodsi3
+;
+; RV64I-LABEL: dont_fold_urem_srem_mixed_constants:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: li a1, 25
+; RV64I-NEXT: call __umoddi3
+; RV64I-NEXT: li a1, 3
+; RV64I-NEXT: call __umoddi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %v1 = urem i32 %v0, 25
+ %v2 = srem i32 %v1, 3
+ ret i32 %v2
+}
+
+define i32 @dont_fold_srem_urem_mixed_constants(i32 %v0) nounwind {
+; RV32I-LABEL: dont_fold_srem_urem_mixed_constants:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 25
+; RV32I-NEXT: call __modsi3
+; RV32I-NEXT: li a1, 3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: tail __umodsi3
+;
+; RV64I-LABEL: dont_fold_srem_urem_mixed_constants:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: li a1, 25
+; RV64I-NEXT: call __moddi3
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: li a1, 3
+; RV64I-NEXT: call __umoddi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %v1 = srem i32 %v0, 25
+ %v2 = urem i32 %v1, 3
+ ret i32 %v2
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
More information about the llvm-commits
mailing list