[llvm] [LegalizeTypes] Expand 128-bit UDIV/UREM by constant via Chunk Addition (PR #146238)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 29 17:44:36 PDT 2025
================
@@ -8030,6 +8028,80 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getConstant(0, dl, HiLoVT));
Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
}
+
+ } else {
+ // If we cannot split in two halves. Let's look for a smaller chunk
+ // width where (1 << ChunkWidth) mod Divisor == 1.
+ // This ensures that the sum of all such chunks modulo Divisor
+ // is equivalent to the original value modulo Divisor.
+ const APInt &Divisor = CN->getAPIntValue();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned BestChunkWidth = 0;
+
+ // We restrict to small chunk sizes (e.g., ≤ 32 bits) to ensure that all
+ // operations remain legal on most targets.
+ unsigned MaxChunk = 32;
+ for (int i = MaxChunk; i >= 1; --i) {
+ APInt ChunkMaxPlus1 = APInt::getOneBitSet(BitWidth, i);
+ if (ChunkMaxPlus1.urem(Divisor).isOne()) {
+ BestChunkWidth = i;
+ break;
+ }
+ }
+
+ // If we found a good chunk width, slice the number and sum the pieces.
+ if (BestChunkWidth > 0) {
+ EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth);
+
+ if (!LL)
+ std::tie(LL, LH) =
+ DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
+ SDValue In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+
+ SmallVector<SDValue, 8> Parts;
+ // Split into fixed-size chunks
+ for (unsigned i = 0; i < BitWidth; i += BestChunkWidth) {
+ SDValue Shift = DAG.getShiftAmountConstant(i, VT, dl);
+ SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift);
+ Chunk = DAG.getNode(ISD::TRUNCATE, dl, ChunkVT, Chunk);
+ Parts.push_back(Chunk);
+ }
+ if (Parts.empty())
+ return false;
+ Sum = Parts[0];
+
+ // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
+ // same logic as used in above if condition.
+ SDValue Carry = DAG.getConstant(0, dl, ChunkVT);
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ChunkVT);
+ for (unsigned i = 1; i < Parts.size(); ++i) {
+ if (isOperationLegalOrCustom(ISD::UADDO_CARRY, ChunkVT)) {
+ SDVTList VTList = DAG.getVTList(ChunkVT, SetCCType);
+ SDValue UAdd = DAG.getNode(ISD::UADDO, dl, VTList, Sum, Parts[i]);
+ Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, UAdd, Carry,
+ UAdd.getValue(1));
+ } else {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]);
+ SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT);
+
+ if (getBooleanContents(ChunkVT) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT);
+ else
+ NewCarry = DAG.getSelect(dl, ChunkVT, NewCarry,
+ DAG.getConstant(1, dl, ChunkVT),
+ DAG.getConstant(0, dl, ChunkVT));
----------------
arsenm wrote:
You're doing the zext in either case, so just do the zext. It doesn't depend on the boolean contents
https://github.com/llvm/llvm-project/pull/146238
More information about the llvm-commits
mailing list