[llvm] [LegalizeTypes] Expand 128-bit UDIV/UREM by constant via Chunk Addition (PR #146238)
Shivam Gupta via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 1 06:43:24 PST 2026
https://github.com/xgupta updated https://github.com/llvm/llvm-project/pull/146238
>From bd5cfc1868b8fc87849c5925fea718276dcd1eab Mon Sep 17 00:00:00 2001
From: Shivam Gupta <shivam98.tkg at gmail.com>
Date: Fri, 27 Jun 2025 19:01:03 +0530
Subject: [PATCH 1/4] [LegalizeTypes] Expand 128-bit UDIV/UREM by constant via
Chunk Addition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch improves the lowering of 128-bit unsigned division and remainder
by constants (UDIV/UREM) by avoiding a fallback to libcall (__udivti3/uremti3)
for specific divisors.
When a divisor D satisfies the condition (1 << ChunkWidth) % D == 1, the
128-bit value is split into fixed-width chunks (e.g., 30-bit) and summed
before applying a smaller UDIV/UREM. This transformation is based on the
"remainder by summing digits" trick described in Hacker’s Delight.
This fixes PR137514 for some constants.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 76 ++++++-
llvm/test/CodeGen/RISCV/div-by-constant.ll | 80 ++++++-
.../CodeGen/RISCV/split-udiv-by-constant.ll | 183 ++++++++++++---
.../CodeGen/RISCV/split-urem-by-constant.ll | 135 ++++++++---
llvm/test/CodeGen/X86/divide-by-constant.ll | 42 +++-
llvm/test/CodeGen/X86/divmod128.ll | 81 +++++--
llvm/test/CodeGen/X86/uint128-div-const.ll | 210 ++++++++++++++++++
7 files changed, 717 insertions(+), 90 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/uint128-div-const.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cc719b1e67f53..4f166d2c3d9f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8151,8 +8151,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
// then add in the carry.
- // TODO: If we can't split it in half, we might be able to split into 3 or
- // more pieces using a smaller bit width.
if (HalfMaxPlus1.urem(Divisor).isOne()) {
assert(!LL == !LH && "Expected both input halves or no input halves!");
if (!LL)
@@ -8200,6 +8198,80 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getConstant(0, dl, HiLoVT));
Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
}
+
+ } else {
+ // If we cannot split in two halves. Let's look for a smaller chunk
+ // width where (1 << ChunkWidth) mod Divisor == 1.
+ // This ensures that the sum of all such chunks modulo Divisor
+ // is equivalent to the original value modulo Divisor.
+ const APInt &Divisor = CN->getAPIntValue();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned BestChunkWidth = 0;
+
+ // We restrict to small chunk sizes (e.g., ≤ 32 bits) to ensure that all
+ // operations remain legal on most targets.
+ unsigned MaxChunk = 32;
+ for (int i = MaxChunk; i >= 1; --i) {
+ APInt ChunkMaxPlus1 = APInt::getOneBitSet(BitWidth, i);
+ if (ChunkMaxPlus1.urem(Divisor).isOne()) {
+ BestChunkWidth = i;
+ break;
+ }
+ }
+
+ // If we found a good chunk width, slice the number and sum the pieces.
+ if (BestChunkWidth > 0) {
+ EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth);
+
+ if (!LL)
+ std::tie(LL, LH) =
+ DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
+ SDValue In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+
+ SmallVector<SDValue, 8> Parts;
+ // Split into fixed-size chunks
+ for (unsigned i = 0; i < BitWidth; i += BestChunkWidth) {
+ SDValue Shift = DAG.getShiftAmountConstant(i, VT, dl);
+ SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift);
+ Chunk = DAG.getNode(ISD::TRUNCATE, dl, ChunkVT, Chunk);
+ Parts.push_back(Chunk);
+ }
+ if (Parts.empty())
+ return false;
+ Sum = Parts[0];
+
+ // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
+ // same logic as used in above if condition.
+ SDValue Carry = DAG.getConstant(0, dl, ChunkVT);
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ChunkVT);
+ for (unsigned i = 1; i < Parts.size(); ++i) {
+ if (isOperationLegalOrCustom(ISD::UADDO_CARRY, ChunkVT)) {
+ SDVTList VTList = DAG.getVTList(ChunkVT, SetCCType);
+ SDValue UAdd = DAG.getNode(ISD::UADDO, dl, VTList, Sum, Parts[i]);
+ Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, UAdd, Carry,
+ UAdd.getValue(1));
+ } else {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]);
+ SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT);
+
+ if (getBooleanContents(ChunkVT) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT);
+ else
+ NewCarry = DAG.getSelect(dl, ChunkVT, NewCarry,
+ DAG.getConstant(1, dl, ChunkVT),
+ DAG.getConstant(0, dl, ChunkVT));
+
+ Sum = DAG.getNode(ISD::ADD, dl, ChunkVT, Add, Carry);
+ Carry = NewCarry;
+ }
+ }
+
+ Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, Sum);
+ } else {
+ return false;
+ }
}
// If we didn't find a sum, we can't do the expansion.
diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 53c3f5841ba0f..5147778d2a926 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -115,16 +115,76 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind {
}
define i64 @udiv64_constant_add(i64 %a) nounwind {
-; RV32-LABEL: udiv64_constant_add:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 7
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
+; RV32IM-LABEL: udiv64_constant_add:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: lui a2, 262144
+; RV32IM-NEXT: slli a3, a1, 2
+; RV32IM-NEXT: srli a4, a0, 30
+; RV32IM-NEXT: srli a5, a1, 28
+; RV32IM-NEXT: lui a6, 149797
+; RV32IM-NEXT: addi a2, a2, -1
+; RV32IM-NEXT: or a3, a4, a3
+; RV32IM-NEXT: and a4, a0, a2
+; RV32IM-NEXT: add a3, a0, a3
+; RV32IM-NEXT: add a5, a3, a5
+; RV32IM-NEXT: and a3, a3, a2
+; RV32IM-NEXT: sltu a3, a3, a4
+; RV32IM-NEXT: lui a4, 449390
+; RV32IM-NEXT: add a3, a5, a3
+; RV32IM-NEXT: lui a5, 748983
+; RV32IM-NEXT: addi a6, a6, -1755
+; RV32IM-NEXT: addi a4, a4, -1171
+; RV32IM-NEXT: addi a5, a5, -585
+; RV32IM-NEXT: and a2, a3, a2
+; RV32IM-NEXT: mulhu a3, a2, a6
+; RV32IM-NEXT: slli a6, a3, 3
+; RV32IM-NEXT: add a2, a2, a3
+; RV32IM-NEXT: sub a2, a2, a6
+; RV32IM-NEXT: sub a3, a0, a2
+; RV32IM-NEXT: sltu a0, a0, a2
+; RV32IM-NEXT: mul a2, a3, a4
+; RV32IM-NEXT: mulhu a4, a3, a5
+; RV32IM-NEXT: sub a1, a1, a0
+; RV32IM-NEXT: add a2, a4, a2
+; RV32IM-NEXT: mul a1, a1, a5
+; RV32IM-NEXT: add a1, a2, a1
+; RV32IM-NEXT: mul a0, a3, a5
+; RV32IM-NEXT: ret
+;
+; RV32IMZB-LABEL: udiv64_constant_add:
+; RV32IMZB: # %bb.0:
+; RV32IMZB-NEXT: srli a2, a0, 30
+; RV32IMZB-NEXT: srli a3, a1, 28
+; RV32IMZB-NEXT: lui a4, 786432
+; RV32IMZB-NEXT: slli a5, a0, 2
+; RV32IMZB-NEXT: lui a6, 149797
+; RV32IMZB-NEXT: sh2add a2, a1, a2
+; RV32IMZB-NEXT: srli a5, a5, 2
+; RV32IMZB-NEXT: add a2, a0, a2
+; RV32IMZB-NEXT: add a3, a2, a3
+; RV32IMZB-NEXT: andn a2, a2, a4
+; RV32IMZB-NEXT: sltu a2, a2, a5
+; RV32IMZB-NEXT: lui a5, 449390
+; RV32IMZB-NEXT: add a2, a3, a2
+; RV32IMZB-NEXT: lui a3, 748983
+; RV32IMZB-NEXT: addi a6, a6, -1755
+; RV32IMZB-NEXT: addi a5, a5, -1171
+; RV32IMZB-NEXT: addi a3, a3, -585
+; RV32IMZB-NEXT: andn a2, a2, a4
+; RV32IMZB-NEXT: mulhu a4, a2, a6
+; RV32IMZB-NEXT: slli a6, a4, 3
+; RV32IMZB-NEXT: add a2, a2, a4
+; RV32IMZB-NEXT: sub a2, a2, a6
+; RV32IMZB-NEXT: sub a4, a0, a2
+; RV32IMZB-NEXT: sltu a0, a0, a2
+; RV32IMZB-NEXT: mul a2, a4, a5
+; RV32IMZB-NEXT: mulhu a5, a4, a3
+; RV32IMZB-NEXT: sub a1, a1, a0
+; RV32IMZB-NEXT: add a2, a5, a2
+; RV32IMZB-NEXT: mul a1, a1, a3
+; RV32IMZB-NEXT: add a1, a2, a1
+; RV32IMZB-NEXT: mul a0, a4, a3
+; RV32IMZB-NEXT: ret
;
; RV64-LABEL: udiv64_constant_add:
; RV64: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
index eb70d7f43c0ef..8250fc3a176e2 100644
--- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
@@ -117,24 +117,89 @@ define iXLen2 @test_udiv_5(iXLen2 %x) nounwind {
define iXLen2 @test_udiv_7(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_7:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 7
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lui a2, 262144
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: srli a4, a0, 30
+; RV32-NEXT: srli a5, a1, 28
+; RV32-NEXT: lui a6, 149797
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: or a3, a4, a3
+; RV32-NEXT: and a4, a0, a2
+; RV32-NEXT: add a3, a0, a3
+; RV32-NEXT: add a5, a3, a5
+; RV32-NEXT: and a3, a3, a2
+; RV32-NEXT: sltu a3, a3, a4
+; RV32-NEXT: lui a4, 449390
+; RV32-NEXT: add a3, a5, a3
+; RV32-NEXT: lui a5, 748983
+; RV32-NEXT: addi a6, a6, -1755
+; RV32-NEXT: addi a4, a4, -1171
+; RV32-NEXT: addi a5, a5, -585
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: mulhu a3, a2, a6
+; RV32-NEXT: slli a6, a3, 3
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: sub a2, a2, a6
+; RV32-NEXT: sub a3, a0, a2
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: mul a2, a3, a4
+; RV32-NEXT: mulhu a4, a3, a5
+; RV32-NEXT: sub a1, a1, a0
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: mul a1, a1, a5
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: mul a0, a3, a5
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_7:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 7
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: srli a3, a0, 60
+; RV64-NEXT: slli a4, a1, 34
+; RV64-NEXT: srli a5, a0, 30
+; RV64-NEXT: lui a6, 262144
+; RV64-NEXT: srli a7, a1, 26
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: lui a3, 748983
+; RV64-NEXT: or a4, a5, a4
+; RV64-NEXT: addi a6, a6, -1
+; RV64-NEXT: addi a3, a3, -585
+; RV64-NEXT: add a4, a0, a4
+; RV64-NEXT: slli a5, a3, 33
+; RV64-NEXT: add a3, a3, a5
+; RV64-NEXT: and a5, a0, a6
+; RV64-NEXT: add a2, a4, a2
+; RV64-NEXT: and a4, a4, a6
+; RV64-NEXT: sltu a5, a4, a5
+; RV64-NEXT: add a5, a2, a5
+; RV64-NEXT: and a2, a2, a6
+; RV64-NEXT: sltu a2, a2, a4
+; RV64-NEXT: srli a4, a1, 56
+; RV64-NEXT: add a2, a2, a4
+; RV64-NEXT: lui a4, %hi(.LCPI2_0)
+; RV64-NEXT: add a7, a5, a7
+; RV64-NEXT: and a5, a5, a6
+; RV64-NEXT: add a2, a7, a2
+; RV64-NEXT: and a7, a7, a6
+; RV64-NEXT: sltu a5, a7, a5
+; RV64-NEXT: lui a7, %hi(.LCPI2_1)
+; RV64-NEXT: ld a4, %lo(.LCPI2_0)(a4)
+; RV64-NEXT: ld a7, %lo(.LCPI2_1)(a7)
+; RV64-NEXT: add a2, a2, a5
+; RV64-NEXT: and a2, a2, a6
+; RV64-NEXT: mulhu a4, a2, a4
+; RV64-NEXT: slli a5, a4, 3
+; RV64-NEXT: add a2, a2, a4
+; RV64-NEXT: sub a2, a2, a5
+; RV64-NEXT: sub a4, a0, a2
+; RV64-NEXT: sltu a0, a0, a2
+; RV64-NEXT: mul a2, a4, a7
+; RV64-NEXT: mulhu a5, a4, a3
+; RV64-NEXT: sub a1, a1, a0
+; RV64-NEXT: add a2, a5, a2
+; RV64-NEXT: mul a1, a1, a3
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: mul a0, a4, a3
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 7
ret iXLen2 %a
@@ -143,24 +208,86 @@ define iXLen2 @test_udiv_7(iXLen2 %x) nounwind {
define iXLen2 @test_udiv_9(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_9:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 9
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lui a2, 262144
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: srli a4, a0, 30
+; RV32-NEXT: srli a5, a1, 28
+; RV32-NEXT: lui a6, 233017
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: or a3, a4, a3
+; RV32-NEXT: and a4, a0, a2
+; RV32-NEXT: add a3, a0, a3
+; RV32-NEXT: add a5, a3, a5
+; RV32-NEXT: and a3, a3, a2
+; RV32-NEXT: sltu a3, a3, a4
+; RV32-NEXT: lui a4, 582542
+; RV32-NEXT: addi a6, a6, -455
+; RV32-NEXT: addi a4, a4, 910
+; RV32-NEXT: add a3, a5, a3
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: mulhu a3, a2, a6
+; RV32-NEXT: srli a3, a3, 1
+; RV32-NEXT: slli a5, a3, 3
+; RV32-NEXT: sub a2, a2, a3
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: sub a3, a0, a2
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: mul a2, a3, a4
+; RV32-NEXT: mulhu a4, a3, a6
+; RV32-NEXT: sub a1, a1, a0
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: mul a1, a1, a6
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: mul a0, a3, a6
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_9:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 9
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: srli a3, a0, 60
+; RV64-NEXT: slli a4, a1, 34
+; RV64-NEXT: srli a5, a0, 30
+; RV64-NEXT: lui a6, 262144
+; RV64-NEXT: srli a7, a1, 26
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: srli a3, a1, 56
+; RV64-NEXT: or a4, a5, a4
+; RV64-NEXT: addi a6, a6, -1
+; RV64-NEXT: add a4, a0, a4
+; RV64-NEXT: and a5, a0, a6
+; RV64-NEXT: add a2, a4, a2
+; RV64-NEXT: and a4, a4, a6
+; RV64-NEXT: sltu a5, a4, a5
+; RV64-NEXT: add a5, a2, a5
+; RV64-NEXT: and a2, a2, a6
+; RV64-NEXT: sltu a2, a2, a4
+; RV64-NEXT: lui a4, %hi(.LCPI3_0)
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: lui a3, %hi(.LCPI3_1)
+; RV64-NEXT: add a7, a5, a7
+; RV64-NEXT: and a5, a5, a6
+; RV64-NEXT: add a2, a7, a2
+; RV64-NEXT: and a7, a7, a6
+; RV64-NEXT: sltu a5, a7, a5
+; RV64-NEXT: lui a7, %hi(.LCPI3_2)
+; RV64-NEXT: ld a4, %lo(.LCPI3_0)(a4)
+; RV64-NEXT: ld a3, %lo(.LCPI3_1)(a3)
+; RV64-NEXT: ld a7, %lo(.LCPI3_2)(a7)
+; RV64-NEXT: add a2, a2, a5
+; RV64-NEXT: and a2, a2, a6
+; RV64-NEXT: mulhu a4, a2, a4
+; RV64-NEXT: slli a5, a4, 3
+; RV64-NEXT: sub a2, a2, a4
+; RV64-NEXT: sub a2, a2, a5
+; RV64-NEXT: sub a4, a0, a2
+; RV64-NEXT: sltu a0, a0, a2
+; RV64-NEXT: mul a2, a4, a3
+; RV64-NEXT: mulhu a3, a4, a7
+; RV64-NEXT: sub a1, a1, a0
+; RV64-NEXT: add a2, a3, a2
+; RV64-NEXT: mul a1, a1, a7
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: mul a0, a4, a7
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 9
ret iXLen2 %a
diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
index bc4a99a00ac64..1680ea7d8da30 100644
--- a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
@@ -79,24 +79,63 @@ define iXLen2 @test_urem_5(iXLen2 %x) nounwind {
define iXLen2 @test_urem_7(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_7:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 7
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lui a2, 262144
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: srli a4, a0, 30
+; RV32-NEXT: srli a1, a1, 28
+; RV32-NEXT: lui a5, 149797
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: or a3, a4, a3
+; RV32-NEXT: addi a4, a5, -1755
+; RV32-NEXT: and a5, a0, a2
+; RV32-NEXT: add a0, a0, a3
+; RV32-NEXT: and a3, a0, a2
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: sltu a1, a3, a5
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: mulhu a1, a0, a4
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: sub a0, a0, a2
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_7:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 7
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: srli a3, a0, 60
+; RV64-NEXT: slli a4, a1, 34
+; RV64-NEXT: srli a5, a0, 30
+; RV64-NEXT: lui a6, 262144
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: srli a3, a1, 26
+; RV64-NEXT: srli a1, a1, 56
+; RV64-NEXT: or a4, a5, a4
+; RV64-NEXT: lui a5, %hi(.LCPI2_0)
+; RV64-NEXT: addi a6, a6, -1
+; RV64-NEXT: ld a5, %lo(.LCPI2_0)(a5)
+; RV64-NEXT: add a4, a0, a4
+; RV64-NEXT: and a0, a0, a6
+; RV64-NEXT: add a2, a4, a2
+; RV64-NEXT: and a4, a4, a6
+; RV64-NEXT: sltu a0, a4, a0
+; RV64-NEXT: add a0, a2, a0
+; RV64-NEXT: and a2, a2, a6
+; RV64-NEXT: sltu a2, a2, a4
+; RV64-NEXT: and a4, a0, a6
+; RV64-NEXT: add a0, a0, a3
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: and a2, a0, a6
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: sltu a1, a2, a4
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: and a0, a0, a6
+; RV64-NEXT: mulhu a1, a0, a5
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: sub a0, a0, a2
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 7
ret iXLen2 %a
@@ -105,24 +144,64 @@ define iXLen2 @test_urem_7(iXLen2 %x) nounwind {
define iXLen2 @test_urem_9(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_9:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 9
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lui a2, 262144
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: srli a4, a0, 30
+; RV32-NEXT: srli a1, a1, 28
+; RV32-NEXT: lui a5, 233017
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: or a3, a4, a3
+; RV32-NEXT: addi a4, a5, -455
+; RV32-NEXT: and a5, a0, a2
+; RV32-NEXT: add a0, a0, a3
+; RV32-NEXT: and a3, a0, a2
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: sltu a1, a3, a5
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: mulhu a1, a0, a4
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: sub a0, a0, a2
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_9:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 9
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: srli a3, a0, 60
+; RV64-NEXT: slli a4, a1, 34
+; RV64-NEXT: srli a5, a0, 30
+; RV64-NEXT: lui a6, 262144
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: srli a3, a1, 26
+; RV64-NEXT: srli a1, a1, 56
+; RV64-NEXT: or a4, a5, a4
+; RV64-NEXT: lui a5, %hi(.LCPI3_0)
+; RV64-NEXT: addi a6, a6, -1
+; RV64-NEXT: ld a5, %lo(.LCPI3_0)(a5)
+; RV64-NEXT: add a4, a0, a4
+; RV64-NEXT: and a0, a0, a6
+; RV64-NEXT: add a2, a4, a2
+; RV64-NEXT: and a4, a4, a6
+; RV64-NEXT: sltu a0, a4, a0
+; RV64-NEXT: add a0, a2, a0
+; RV64-NEXT: and a2, a2, a6
+; RV64-NEXT: sltu a2, a2, a4
+; RV64-NEXT: and a4, a0, a6
+; RV64-NEXT: add a0, a0, a3
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: and a2, a0, a6
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: sltu a1, a2, a4
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: and a0, a0, a6
+; RV64-NEXT: mulhu a1, a0, a5
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: sub a0, a0, a2
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 9
ret iXLen2 %a
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index ac78136b9d8ea..f4f99749969e9 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -294,19 +294,47 @@ entry:
define i64 @PR23590(i64 %x) nounwind {
; X86-LABEL: PR23590:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
; X86-NEXT: pushl $0
; X86-NEXT: pushl $12345 # imm = 0x3039
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __umoddi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $7
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %eax
-; X86-NEXT: calll __udivdi3
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: shldl $2, %esi, %eax
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: shrl $28, %edi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: andl $1073741823, %ebx # imm = 0x3FFFFFFF
+; X86-NEXT: cmpl %ebx, %edx
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF
+; X86-NEXT: movl $613566757, %edx # imm = 0x24924925
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: leal (,%edx,8), %eax
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: subl %edx, %esi
+; X86-NEXT: sbbl $0, %ecx
+; X86-NEXT: movl $-1227133513, %edx # imm = 0xB6DB6DB7
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: imull $1840700269, %esi, %esi # imm = 0x6DB6DB6D
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: imull $-1227133513, %ecx, %ecx # imm = 0xB6DB6DB7
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-FAST-LABEL: PR23590:
diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll
index 3796dd796eaf9..16865030cfc36 100644
--- a/llvm/test/CodeGen/X86/divmod128.ll
+++ b/llvm/test/CodeGen/X86/divmod128.ll
@@ -67,25 +67,76 @@ define i64 @div128(i128 %x) nounwind {
define i64 @umod128(i128 %x) nounwind {
; X86-64-LABEL: umod128:
; X86-64: # %bb.0:
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $11, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3 at PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rsi, %rax
+; X86-64-NEXT: shldq $4, %rdi, %rax
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: shrq $30, %rcx
+; X86-64-NEXT: addl %edi, %ecx
+; X86-64-NEXT: movl %ecx, %edx
+; X86-64-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
+; X86-64-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF
+; X86-64-NEXT: cmpl %edi, %edx
+; X86-64-NEXT: movl %ecx, %edi
+; X86-64-NEXT: adcl %eax, %edi
+; X86-64-NEXT: addl %eax, %ecx
+; X86-64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; X86-64-NEXT: movq %rsi, %rax
+; X86-64-NEXT: shrq $26, %rax
+; X86-64-NEXT: cmpl %edx, %ecx
+; X86-64-NEXT: movl %edi, %ecx
+; X86-64-NEXT: adcl %eax, %ecx
+; X86-64-NEXT: movl %edi, %edx
+; X86-64-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
+; X86-64-NEXT: addl %eax, %edi
+; X86-64-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF
+; X86-64-NEXT: shrq $56, %rsi
+; X86-64-NEXT: cmpl %edx, %edi
+; X86-64-NEXT: adcl %esi, %ecx
+; X86-64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; X86-64-NEXT: movabsq $1676976733973595602, %rdx # imm = 0x1745D1745D1745D2
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax
+; X86-64-NEXT: leaq (%rdx,%rax,2), %rax
+; X86-64-NEXT: subq %rax, %rcx
+; X86-64-NEXT: movq %rcx, %rax
; X86-64-NEXT: retq
;
; WIN64-LABEL: umod128:
; WIN64: # %bb.0:
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $11, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: shldq $4, %rcx, %rax
+; WIN64-NEXT: movq %rcx, %r8
+; WIN64-NEXT: shrq $30, %r8
+; WIN64-NEXT: addl %ecx, %r8d
+; WIN64-NEXT: movl %r8d, %r9d
+; WIN64-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF
+; WIN64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; WIN64-NEXT: cmpl %ecx, %r9d
+; WIN64-NEXT: movl %r8d, %r10d
+; WIN64-NEXT: adcl %eax, %r10d
+; WIN64-NEXT: addl %eax, %r8d
+; WIN64-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: shrq $26, %rax
+; WIN64-NEXT: cmpl %r9d, %r8d
+; WIN64-NEXT: movl %r10d, %ecx
+; WIN64-NEXT: adcl %eax, %ecx
+; WIN64-NEXT: movl %r10d, %r8d
+; WIN64-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; WIN64-NEXT: addl %eax, %r10d
+; WIN64-NEXT: andl $1073741823, %r10d # imm = 0x3FFFFFFF
+; WIN64-NEXT: shrq $56, %rdx
+; WIN64-NEXT: cmpl %r8d, %r10d
+; WIN64-NEXT: adcl %edx, %ecx
+; WIN64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; WIN64-NEXT: movabsq $1676976733973595602, %rdx # imm = 0x1745D1745D1745D2
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
+; WIN64-NEXT: leaq (%rdx,%rax,2), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/uint128-div-const.ll b/llvm/test/CodeGen/X86/uint128-div-const.ll
new file mode 100644
index 0000000000000..60dddad952679
--- /dev/null
+++ b/llvm/test/CodeGen/X86/uint128-div-const.ll
@@ -0,0 +1,210 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s
+
+define i128 @div_by_7(i128 %x) {
+; CHECK-LABEL: div_by_7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shldq $4, %rdi, %rax
+; CHECK-NEXT: movq %rdi, %rcx
+; CHECK-NEXT: shrq $30, %rcx
+; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
+; CHECK-NEXT: movl %edi, %r8d
+; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; CHECK-NEXT: cmpl %r8d, %edx
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: adcl %eax, %r8d
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq $26, %rax
+; CHECK-NEXT: cmpl %edx, %ecx
+; CHECK-NEXT: movl %r8d, %edx
+; CHECK-NEXT: adcl %eax, %edx
+; CHECK-NEXT: movl %r8d, %r9d
+; CHECK-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF
+; CHECK-NEXT: addl %eax, %r8d
+; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: shrq $56, %rcx
+; CHECK-NEXT: cmpl %r9d, %r8d
+; CHECK-NEXT: adcl %edx, %ecx
+; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; CHECK-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: mulq %rdx
+; CHECK-NEXT: leal (,%rdx,8), %eax
+; CHECK-NEXT: subq %rax, %rdx
+; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: subq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rsi
+; CHECK-NEXT: movabsq $-5270498306774157605, %rcx # imm = 0xB6DB6DB6DB6DB6DB
+; CHECK-NEXT: imulq %rdi, %rcx
+; CHECK-NEXT: movabsq $7905747460161236407, %r8 # imm = 0x6DB6DB6DB6DB6DB7
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: mulq %r8
+; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: imulq %rsi, %r8
+; CHECK-NEXT: addq %r8, %rdx
+; CHECK-NEXT: retq
+entry:
+ %div = udiv i128 %x, 7
+ ret i128 %div
+}
+
+define i128 @div_by_9(i128 %x) {
+; CHECK-LABEL: div_by_9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shldq $4, %rdi, %rax
+; CHECK-NEXT: movq %rdi, %rcx
+; CHECK-NEXT: shrq $30, %rcx
+; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
+; CHECK-NEXT: movl %edi, %r8d
+; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; CHECK-NEXT: cmpl %r8d, %edx
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: adcl %eax, %r8d
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq $26, %rax
+; CHECK-NEXT: cmpl %edx, %ecx
+; CHECK-NEXT: movl %r8d, %edx
+; CHECK-NEXT: adcl %eax, %edx
+; CHECK-NEXT: movl %r8d, %r9d
+; CHECK-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF
+; CHECK-NEXT: addl %eax, %r8d
+; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: shrq $56, %rcx
+; CHECK-NEXT: cmpl %r9d, %r8d
+; CHECK-NEXT: adcl %edx, %ecx
+; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; CHECK-NEXT: movabsq $2049638230412172402, %rdx # imm = 0x1C71C71C71C71C72
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: mulq %rdx
+; CHECK-NEXT: leaq (%rdx,%rdx,8), %rax
+; CHECK-NEXT: subq %rax, %rcx
+; CHECK-NEXT: subq %rcx, %rdi
+; CHECK-NEXT: sbbq $0, %rsi
+; CHECK-NEXT: movabsq $4099276460824344803, %rcx # imm = 0x38E38E38E38E38E3
+; CHECK-NEXT: imulq %rdi, %rcx
+; CHECK-NEXT: movabsq $-8198552921648689607, %r8 # imm = 0x8E38E38E38E38E39
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: mulq %r8
+; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: imulq %rsi, %r8
+; CHECK-NEXT: addq %r8, %rdx
+; CHECK-NEXT: retq
+entry:
+ %div = udiv i128 %x, 9
+ ret i128 %div
+}
+
+define i128 @div_by_25(i128 %x) {
+; CHECK-LABEL: div_by_25:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shldq $24, %rdi, %rax
+; CHECK-NEXT: movq %rdi, %rcx
+; CHECK-NEXT: shrq $20, %rcx
+; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF
+; CHECK-NEXT: movl %edi, %r8d
+; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
+; CHECK-NEXT: cmpl %r8d, %edx
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: adcl %eax, %r8d
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shldq $4, %rdi, %rax
+; CHECK-NEXT: cmpl %edx, %ecx
+; CHECK-NEXT: movl %r8d, %ecx
+; CHECK-NEXT: adcl %eax, %ecx
+; CHECK-NEXT: movl %r8d, %edx
+; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF
+; CHECK-NEXT: addl %eax, %r8d
+; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq $16, %rax
+; CHECK-NEXT: cmpl %edx, %r8d
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: adcl %eax, %edx
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq $36, %rax
+; CHECK-NEXT: cmpl %r8d, %ecx
+; CHECK-NEXT: movl %edx, %r8d
+; CHECK-NEXT: adcl %eax, %r8d
+; CHECK-NEXT: movl %edx, %r9d
+; CHECK-NEXT: andl $1048575, %r9d # imm = 0xFFFFF
+; CHECK-NEXT: addl %eax, %edx
+; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: shrq $56, %rcx
+; CHECK-NEXT: cmpl %r9d, %edx
+; CHECK-NEXT: adcl %r8d, %ecx
+; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF
+; CHECK-NEXT: movabsq $737869762948382065, %rdx # imm = 0xA3D70A3D70A3D71
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: mulq %rdx
+; CHECK-NEXT: leaq (%rdx,%rdx,4), %rax
+; CHECK-NEXT: leaq (%rax,%rax,4), %rax
+; CHECK-NEXT: subq %rax, %rcx
+; CHECK-NEXT: subq %rcx, %rdi
+; CHECK-NEXT: sbbq $0, %rsi
+; CHECK-NEXT: movabsq $2951479051793528258, %rcx # imm = 0x28F5C28F5C28F5C2
+; CHECK-NEXT: imulq %rdi, %rcx
+; CHECK-NEXT: movabsq $-8116567392432202711, %r8 # imm = 0x8F5C28F5C28F5C29
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: mulq %r8
+; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: imulq %rsi, %r8
+; CHECK-NEXT: addq %r8, %rdx
+; CHECK-NEXT: retq
+entry:
+ %div = udiv i128 %x, 25
+ ret i128 %div
+}
+
+define i128 @div_by_14(i128 %x) {
+; CHECK-LABEL: div_by_14:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movl $14, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: callq __udivti3 at PLT
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %div = udiv i128 %x, 14
+ ret i128 %div
+}
+
+define i128 @div_by_22(i128 %x) {
+; CHECK-LABEL: div_by_22:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movl $22, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: callq __udivti3 at PLT
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %div = udiv i128 %x, 22
+ ret i128 %div
+}
>From cbf63f7e26c4b28bba87c863766cb0855d927213 Mon Sep 17 00:00:00 2001
From: Shivam Gupta <shivam98.tkg at gmail.com>
Date: Sun, 1 Mar 2026 19:45:52 +0530
Subject: [PATCH 2/4] Address Review comments
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 44 +++---
llvm/test/CodeGen/AArch64/rem-by-const.ll | 80 ++++++-----
llvm/test/CodeGen/X86/divmod128.ll | 80 ++++-------
llvm/test/CodeGen/X86/uint128-div-const.ll | 136 +++++-------------
4 files changed, 135 insertions(+), 205 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4f166d2c3d9f2..c179d17647982 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8208,10 +8208,25 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
unsigned BitWidth = VT.getScalarSizeInBits();
unsigned BestChunkWidth = 0;
- // We restrict to small chunk sizes (e.g., ≤ 32 bits) to ensure that all
- // operations remain legal on most targets.
- unsigned MaxChunk = 32;
- for (int i = MaxChunk; i >= 1; --i) {
+ // Determine the largest legal scalar integer type we can safely use
+ // for chunk operations.
+ unsigned MaxChunk = 0;
+
+ // Use the largest legal integer register type for this VT.
+ EVT LegalVT = EVT(getRegisterType(*DAG.getContext(), VT));
+ if (LegalVT.isInteger())
+ MaxChunk = LegalVT.getSizeInBits();
+ else
+ return false;
+
+ // Clamp to the original bit width.
+ MaxChunk = std::min(MaxChunk, BitWidth);
+
+ // Find the largest chunk width W in (MaxChunk/2, MaxChunk] satisfying
+ // (1 << W) % Divisor == 1.
+ // Then 2^W ≡ 1 (mod Divisor), so a value written in base 2^W can be
+ // reduced modulo Divisor by summing its W-bit chunks.
+ for (unsigned i = MaxChunk; i > MaxChunk / 2; --i) {
APInt ChunkMaxPlus1 = APInt::getOneBitSet(BitWidth, i);
if (ChunkMaxPlus1.urem(Divisor).isOne()) {
BestChunkWidth = i;
@@ -8223,10 +8238,13 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
if (BestChunkWidth > 0) {
EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth);
- if (!LL)
- std::tie(LL, LH) =
- DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
- SDValue In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+ SDValue In;
+
+ if (LL) {
+ In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+ } else {
+ In = N->getOperand(0);
+ }
SmallVector<SDValue, 8> Parts;
// Split into fixed-size chunks
@@ -8254,15 +8272,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
} else {
SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]);
SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT);
-
- if (getBooleanContents(ChunkVT) ==
- TargetLoweringBase::ZeroOrOneBooleanContent)
- NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT);
- else
- NewCarry = DAG.getSelect(dl, ChunkVT, NewCarry,
- DAG.getConstant(1, dl, ChunkVT),
- DAG.getConstant(0, dl, ChunkVT));
-
+ NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT);
Sum = DAG.getNode(ISD::ADD, dl, ChunkVT, Add, Carry);
Carry = NewCarry;
}
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 171fa1e74ce54..9ae698115c281 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -502,13 +502,23 @@ entry:
define i128 @ui128_7(i128 %a, i128 %b) {
; CHECK-SD-LABEL: ui128_7:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: mov w2, #7 // =0x7
-; CHECK-SD-NEXT: mov x3, xzr
-; CHECK-SD-NEXT: bl __umodti3
-; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: extr x9, x1, x0, #63
+; CHECK-SD-NEXT: mov x8, #18725 // =0x4925
+; CHECK-SD-NEXT: and x11, x0, #0x7fffffffffffffff
+; CHECK-SD-NEXT: movk x8, #9362, lsl #16
+; CHECK-SD-NEXT: add x9, x0, x9
+; CHECK-SD-NEXT: movk x8, #37449, lsl #32
+; CHECK-SD-NEXT: add x10, x9, x1, lsr #62
+; CHECK-SD-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-SD-NEXT: movk x8, #18724, lsl #48
+; CHECK-SD-NEXT: cmp x9, x11
+; CHECK-SD-NEXT: mov x1, xzr
+; CHECK-SD-NEXT: cinc x9, x10, lo
+; CHECK-SD-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-SD-NEXT: umulh x8, x9, x8
+; CHECK-SD-NEXT: lsr x8, x8, #1
+; CHECK-SD-NEXT: sub x8, x8, x8, lsl #3
+; CHECK-SD-NEXT: add x0, x9, x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ui128_7:
@@ -3089,34 +3099,36 @@ entry:
define <2 x i128> @uv2i128_7(<2 x i128> %d, <2 x i128> %e) {
; CHECK-SD-LABEL: uv2i128_7:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w19, -8
-; CHECK-SD-NEXT: .cfi_offset w20, -16
-; CHECK-SD-NEXT: .cfi_offset w21, -24
-; CHECK-SD-NEXT: .cfi_offset w22, -32
-; CHECK-SD-NEXT: .cfi_offset w30, -48
-; CHECK-SD-NEXT: mov x19, x3
-; CHECK-SD-NEXT: mov x20, x2
-; CHECK-SD-NEXT: mov w2, #7 // =0x7
-; CHECK-SD-NEXT: mov x3, xzr
-; CHECK-SD-NEXT: bl __umodti3
-; CHECK-SD-NEXT: mov x21, x0
-; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: mov x0, x20
-; CHECK-SD-NEXT: mov x1, x19
-; CHECK-SD-NEXT: mov w2, #7 // =0x7
+; CHECK-SD-NEXT: extr x9, x1, x0, #63
+; CHECK-SD-NEXT: extr x8, x3, x2, #63
+; CHECK-SD-NEXT: and x10, x0, #0x7fffffffffffffff
+; CHECK-SD-NEXT: and x12, x2, #0x7fffffffffffffff
+; CHECK-SD-NEXT: add x9, x0, x9
+; CHECK-SD-NEXT: add x8, x2, x8
+; CHECK-SD-NEXT: add x11, x9, x1, lsr #62
+; CHECK-SD-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-SD-NEXT: mov x1, xzr
+; CHECK-SD-NEXT: cmp x9, x10
+; CHECK-SD-NEXT: add x9, x8, x3, lsr #62
+; CHECK-SD-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-SD-NEXT: cinc x10, x11, lo
+; CHECK-SD-NEXT: mov x11, #18725 // =0x4925
+; CHECK-SD-NEXT: cmp x8, x12
+; CHECK-SD-NEXT: movk x11, #9362, lsl #16
+; CHECK-SD-NEXT: cinc x9, x9, lo
+; CHECK-SD-NEXT: and x8, x10, #0x7fffffffffffffff
+; CHECK-SD-NEXT: movk x11, #37449, lsl #32
+; CHECK-SD-NEXT: and x9, x9, #0x7fffffffffffffff
; CHECK-SD-NEXT: mov x3, xzr
-; CHECK-SD-NEXT: bl __umodti3
-; CHECK-SD-NEXT: mov x2, x0
-; CHECK-SD-NEXT: mov x3, x1
-; CHECK-SD-NEXT: mov x0, x21
-; CHECK-SD-NEXT: mov x1, x22
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK-SD-NEXT: movk x11, #18724, lsl #48
+; CHECK-SD-NEXT: umulh x10, x8, x11
+; CHECK-SD-NEXT: umulh x11, x9, x11
+; CHECK-SD-NEXT: lsr x10, x10, #1
+; CHECK-SD-NEXT: lsr x11, x11, #1
+; CHECK-SD-NEXT: sub x10, x10, x10, lsl #3
+; CHECK-SD-NEXT: sub x11, x11, x11, lsl #3
+; CHECK-SD-NEXT: add x0, x8, x10
+; CHECK-SD-NEXT: add x2, x9, x11
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv2i128_7:
diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll
index 16865030cfc36..10b91e82f915a 100644
--- a/llvm/test/CodeGen/X86/divmod128.ll
+++ b/llvm/test/CodeGen/X86/divmod128.ll
@@ -67,32 +67,17 @@ define i64 @div128(i128 %x) nounwind {
define i64 @umod128(i128 %x) nounwind {
; X86-64-LABEL: umod128:
; X86-64: # %bb.0:
-; X86-64-NEXT: movq %rsi, %rax
-; X86-64-NEXT: shldq $4, %rdi, %rax
-; X86-64-NEXT: movq %rdi, %rcx
-; X86-64-NEXT: shrq $30, %rcx
-; X86-64-NEXT: addl %edi, %ecx
-; X86-64-NEXT: movl %ecx, %edx
-; X86-64-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
-; X86-64-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF
-; X86-64-NEXT: cmpl %edi, %edx
-; X86-64-NEXT: movl %ecx, %edi
-; X86-64-NEXT: adcl %eax, %edi
-; X86-64-NEXT: addl %eax, %ecx
-; X86-64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
-; X86-64-NEXT: movq %rsi, %rax
-; X86-64-NEXT: shrq $26, %rax
-; X86-64-NEXT: cmpl %edx, %ecx
-; X86-64-NEXT: movl %edi, %ecx
-; X86-64-NEXT: adcl %eax, %ecx
-; X86-64-NEXT: movl %edi, %edx
-; X86-64-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
-; X86-64-NEXT: addl %eax, %edi
-; X86-64-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF
+; X86-64-NEXT: movabsq $1152921504606846975, %rax # imm = 0xFFFFFFFFFFFFFFF
+; X86-64-NEXT: movq %rsi, %rcx
+; X86-64-NEXT: shldq $4, %rdi, %rcx
+; X86-64-NEXT: addq %rdi, %rcx
+; X86-64-NEXT: andq %rax, %rdi
+; X86-64-NEXT: movq %rcx, %rdx
+; X86-64-NEXT: andq %rax, %rdx
; X86-64-NEXT: shrq $56, %rsi
-; X86-64-NEXT: cmpl %edx, %edi
-; X86-64-NEXT: adcl %esi, %ecx
-; X86-64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; X86-64-NEXT: cmpq %rdi, %rdx
+; X86-64-NEXT: adcq %rsi, %rcx
+; X86-64-NEXT: andq %rax, %rcx
; X86-64-NEXT: movabsq $1676976733973595602, %rdx # imm = 0x1745D1745D1745D2
; X86-64-NEXT: movq %rcx, %rax
; X86-64-NEXT: mulq %rdx
@@ -104,39 +89,24 @@ define i64 @umod128(i128 %x) nounwind {
;
; WIN64-LABEL: umod128:
; WIN64: # %bb.0:
-; WIN64-NEXT: movq %rdx, %rax
-; WIN64-NEXT: shldq $4, %rcx, %rax
-; WIN64-NEXT: movq %rcx, %r8
-; WIN64-NEXT: shrq $30, %r8
-; WIN64-NEXT: addl %ecx, %r8d
-; WIN64-NEXT: movl %r8d, %r9d
-; WIN64-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF
-; WIN64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
-; WIN64-NEXT: cmpl %ecx, %r9d
-; WIN64-NEXT: movl %r8d, %r10d
-; WIN64-NEXT: adcl %eax, %r10d
-; WIN64-NEXT: addl %eax, %r8d
-; WIN64-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
-; WIN64-NEXT: movq %rdx, %rax
-; WIN64-NEXT: shrq $26, %rax
-; WIN64-NEXT: cmpl %r9d, %r8d
-; WIN64-NEXT: movl %r10d, %ecx
-; WIN64-NEXT: adcl %eax, %ecx
-; WIN64-NEXT: movl %r10d, %r8d
-; WIN64-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
-; WIN64-NEXT: addl %eax, %r10d
-; WIN64-NEXT: andl $1073741823, %r10d # imm = 0x3FFFFFFF
+; WIN64-NEXT: movabsq $1152921504606846975, %rax # imm = 0xFFFFFFFFFFFFFFF
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: shldq $4, %rcx, %r8
+; WIN64-NEXT: addq %rcx, %r8
+; WIN64-NEXT: andq %rax, %rcx
+; WIN64-NEXT: movq %r8, %r9
+; WIN64-NEXT: andq %rax, %r9
; WIN64-NEXT: shrq $56, %rdx
-; WIN64-NEXT: cmpl %r8d, %r10d
-; WIN64-NEXT: adcl %edx, %ecx
-; WIN64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
-; WIN64-NEXT: movabsq $1676976733973595602, %rdx # imm = 0x1745D1745D1745D2
-; WIN64-NEXT: movq %rcx, %rax
-; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: cmpq %rcx, %r9
+; WIN64-NEXT: adcq %rdx, %r8
+; WIN64-NEXT: andq %rax, %r8
+; WIN64-NEXT: movabsq $1676976733973595602, %rcx # imm = 0x1745D1745D1745D2
+; WIN64-NEXT: movq %r8, %rax
+; WIN64-NEXT: mulq %rcx
; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
; WIN64-NEXT: leaq (%rdx,%rax,2), %rax
-; WIN64-NEXT: subq %rax, %rcx
-; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: subq %rax, %r8
+; WIN64-NEXT: movq %r8, %rax
; WIN64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/uint128-div-const.ll b/llvm/test/CodeGen/X86/uint128-div-const.ll
index 60dddad952679..952b98af6adea 100644
--- a/llvm/test/CodeGen/X86/uint128-div-const.ll
+++ b/llvm/test/CodeGen/X86/uint128-div-const.ll
@@ -4,38 +4,24 @@
define i128 @div_by_7(i128 %x) {
; CHECK-LABEL: div_by_7:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shldq $4, %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: shrq $30, %rcx
-; CHECK-NEXT: addl %edi, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
-; CHECK-NEXT: movl %edi, %r8d
-; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
-; CHECK-NEXT: cmpl %r8d, %edx
-; CHECK-NEXT: movl %ecx, %r8d
-; CHECK-NEXT: adcl %eax, %r8d
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrq $26, %rax
-; CHECK-NEXT: cmpl %edx, %ecx
-; CHECK-NEXT: movl %r8d, %edx
-; CHECK-NEXT: adcl %eax, %edx
-; CHECK-NEXT: movl %r8d, %r9d
-; CHECK-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF
-; CHECK-NEXT: addl %eax, %r8d
-; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: movq %rdi, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: movq %rsi, %r8
+; CHECK-NEXT: shldq $1, %rdi, %r8
+; CHECK-NEXT: addq %rdi, %r8
+; CHECK-NEXT: movq %r8, %r9
+; CHECK-NEXT: andq %rax, %r9
; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: shrq $56, %rcx
-; CHECK-NEXT: cmpl %r9d, %r8d
-; CHECK-NEXT: adcl %edx, %ecx
-; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
-; CHECK-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493
+; CHECK-NEXT: shrq $62, %rcx
+; CHECK-NEXT: cmpq %rdx, %r9
+; CHECK-NEXT: adcq %r8, %rcx
+; CHECK-NEXT: andq %rax, %rcx
+; CHECK-NEXT: movabsq $5270498306774157605, %rdx # imm = 0x4924924924924925
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: mulq %rdx
-; CHECK-NEXT: leal (,%rdx,8), %eax
+; CHECK-NEXT: shrq %rdx
+; CHECK-NEXT: leaq (,%rdx,8), %rax
; CHECK-NEXT: subq %rax, %rdx
; CHECK-NEXT: addq %rcx, %rdx
; CHECK-NEXT: subq %rdx, %rdi
@@ -57,34 +43,19 @@ entry:
define i128 @div_by_9(i128 %x) {
; CHECK-LABEL: div_by_9:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shldq $4, %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: shrq $30, %rcx
-; CHECK-NEXT: addl %edi, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
-; CHECK-NEXT: movl %edi, %r8d
-; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
-; CHECK-NEXT: cmpl %r8d, %edx
-; CHECK-NEXT: movl %ecx, %r8d
-; CHECK-NEXT: adcl %eax, %r8d
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrq $26, %rax
-; CHECK-NEXT: cmpl %edx, %ecx
-; CHECK-NEXT: movl %r8d, %edx
-; CHECK-NEXT: adcl %eax, %edx
-; CHECK-NEXT: movl %r8d, %r9d
-; CHECK-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF
-; CHECK-NEXT: addl %eax, %r8d
-; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF
+; CHECK-NEXT: movabsq $1152921504606846975, %rax # imm = 0xFFFFFFFFFFFFFFF
+; CHECK-NEXT: movq %rdi, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: movq %rsi, %r8
+; CHECK-NEXT: shldq $4, %rdi, %r8
+; CHECK-NEXT: addq %rdi, %r8
+; CHECK-NEXT: movq %r8, %r9
+; CHECK-NEXT: andq %rax, %r9
; CHECK-NEXT: movq %rsi, %rcx
; CHECK-NEXT: shrq $56, %rcx
-; CHECK-NEXT: cmpl %r9d, %r8d
-; CHECK-NEXT: adcl %edx, %ecx
-; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF
+; CHECK-NEXT: cmpq %rdx, %r9
+; CHECK-NEXT: adcq %r8, %rcx
+; CHECK-NEXT: andq %rax, %rcx
; CHECK-NEXT: movabsq $2049638230412172402, %rdx # imm = 0x1C71C71C71C71C72
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: mulq %rdx
@@ -109,52 +80,19 @@ entry:
define i128 @div_by_25(i128 %x) {
; CHECK-LABEL: div_by_25:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shldq $24, %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: shrq $20, %rcx
-; CHECK-NEXT: addl %edi, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF
-; CHECK-NEXT: movl %edi, %r8d
-; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
-; CHECK-NEXT: cmpl %r8d, %edx
-; CHECK-NEXT: movl %ecx, %r8d
-; CHECK-NEXT: adcl %eax, %r8d
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shldq $4, %rdi, %rax
-; CHECK-NEXT: cmpl %edx, %ecx
-; CHECK-NEXT: movl %r8d, %ecx
-; CHECK-NEXT: adcl %eax, %ecx
-; CHECK-NEXT: movl %r8d, %edx
-; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF
-; CHECK-NEXT: addl %eax, %r8d
-; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrq $16, %rax
-; CHECK-NEXT: cmpl %edx, %r8d
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: adcl %eax, %edx
-; CHECK-NEXT: movl %ecx, %r8d
-; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrq $36, %rax
-; CHECK-NEXT: cmpl %r8d, %ecx
-; CHECK-NEXT: movl %edx, %r8d
-; CHECK-NEXT: adcl %eax, %r8d
-; CHECK-NEXT: movl %edx, %r9d
-; CHECK-NEXT: andl $1048575, %r9d # imm = 0xFFFFF
-; CHECK-NEXT: addl %eax, %edx
-; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF
+; CHECK-NEXT: movabsq $1152921504606846975, %rax # imm = 0xFFFFFFFFFFFFFFF
+; CHECK-NEXT: movq %rdi, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: movq %rsi, %r8
+; CHECK-NEXT: shldq $4, %rdi, %r8
+; CHECK-NEXT: addq %rdi, %r8
+; CHECK-NEXT: movq %r8, %r9
+; CHECK-NEXT: andq %rax, %r9
; CHECK-NEXT: movq %rsi, %rcx
; CHECK-NEXT: shrq $56, %rcx
-; CHECK-NEXT: cmpl %r9d, %edx
-; CHECK-NEXT: adcl %r8d, %ecx
-; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF
+; CHECK-NEXT: cmpq %rdx, %r9
+; CHECK-NEXT: adcq %r8, %rcx
+; CHECK-NEXT: andq %rax, %rcx
; CHECK-NEXT: movabsq $737869762948382065, %rdx # imm = 0xA3D70A3D70A3D71
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: mulq %rdx
>From 3f4c8c25e503289190bac563622e908cd89c0adf Mon Sep 17 00:00:00 2001
From: Shivam Gupta <shivam98.tkg at gmail.com>
Date: Sun, 1 Mar 2026 19:59:50 +0530
Subject: [PATCH 3/4] minor style fix
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++++++++-----------
1 file changed, 11 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c179d17647982..c9a9703a6feff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8210,9 +8210,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// Determine the largest legal scalar integer type we can safely use
// for chunk operations.
- unsigned MaxChunk = 0;
-
- // Use the largest legal integer register type for this VT.
+ unsigned MaxChunk;
EVT LegalVT = EVT(getRegisterType(*DAG.getContext(), VT));
if (LegalVT.isInteger())
MaxChunk = LegalVT.getSizeInBits();
@@ -8235,16 +8233,18 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
}
// If we found a good chunk width, slice the number and sum the pieces.
- if (BestChunkWidth > 0) {
- EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth);
+ if (!BestChunkWidth)
+ return false;
- SDValue In;
+ EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth);
- if (LL) {
- In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
- } else {
- In = N->getOperand(0);
- }
+ SDValue In;
+
+ if (LL) {
+ In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+ } else {
+ In = N->getOperand(0);
+ }
SmallVector<SDValue, 8> Parts;
// Split into fixed-size chunks
@@ -8279,9 +8279,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
}
Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, Sum);
- } else {
- return false;
- }
}
// If we didn't find a sum, we can't do the expansion.
>From b269c09ebf620fa77ccc2574508b6f23a305ec17 Mon Sep 17 00:00:00 2001
From: Shivam Gupta <shivam98.tkg at gmail.com>
Date: Sun, 1 Mar 2026 20:08:44 +0530
Subject: [PATCH 4/4] some minor formating
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 62 +++++++++----------
1 file changed, 31 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c9a9703a6feff..9cc93fd333f5b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8246,39 +8246,39 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
In = N->getOperand(0);
}
- SmallVector<SDValue, 8> Parts;
- // Split into fixed-size chunks
- for (unsigned i = 0; i < BitWidth; i += BestChunkWidth) {
- SDValue Shift = DAG.getShiftAmountConstant(i, VT, dl);
- SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift);
- Chunk = DAG.getNode(ISD::TRUNCATE, dl, ChunkVT, Chunk);
- Parts.push_back(Chunk);
- }
- if (Parts.empty())
- return false;
- Sum = Parts[0];
-
- // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
- // same logic as used in above if condition.
- SDValue Carry = DAG.getConstant(0, dl, ChunkVT);
- EVT SetCCType =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ChunkVT);
- for (unsigned i = 1; i < Parts.size(); ++i) {
- if (isOperationLegalOrCustom(ISD::UADDO_CARRY, ChunkVT)) {
- SDVTList VTList = DAG.getVTList(ChunkVT, SetCCType);
- SDValue UAdd = DAG.getNode(ISD::UADDO, dl, VTList, Sum, Parts[i]);
- Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, UAdd, Carry,
- UAdd.getValue(1));
- } else {
- SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]);
- SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT);
- NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT);
- Sum = DAG.getNode(ISD::ADD, dl, ChunkVT, Add, Carry);
- Carry = NewCarry;
- }
+ SmallVector<SDValue, 8> Parts;
+ // Split into fixed-size chunks
+ for (unsigned i = 0; i < BitWidth; i += BestChunkWidth) {
+ SDValue Shift = DAG.getShiftAmountConstant(i, VT, dl);
+ SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift);
+ Chunk = DAG.getNode(ISD::TRUNCATE, dl, ChunkVT, Chunk);
+ Parts.push_back(Chunk);
+ }
+ if (Parts.empty())
+ return false;
+ Sum = Parts[0];
+
+ // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
+ // same logic as used in above if condition.
+ SDValue Carry = DAG.getConstant(0, dl, ChunkVT);
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ChunkVT);
+ for (unsigned i = 1; i < Parts.size(); ++i) {
+ if (isOperationLegalOrCustom(ISD::UADDO_CARRY, ChunkVT)) {
+ SDVTList VTList = DAG.getVTList(ChunkVT, SetCCType);
+ SDValue UAdd = DAG.getNode(ISD::UADDO, dl, VTList, Sum, Parts[i]);
+ Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, UAdd, Carry,
+ UAdd.getValue(1));
+ } else {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]);
+ SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT);
+ NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT);
+ Sum = DAG.getNode(ISD::ADD, dl, ChunkVT, Add, Carry);
+ Carry = NewCarry;
}
+ }
- Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, Sum);
+ Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, Sum);
}
// If we didn't find a sum, we can't do the expansion.
More information about the llvm-commits
mailing list