[llvm] [CodeGen] Emit a more efficient magic number multiplication for exact udivs (PR #87161)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 02:06:34 PDT 2024
================
@@ -5071,8 +5071,36 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
auto &MIB = Builder;
+ bool UseSRL = false;
bool UseNPQ = false;
SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+ SmallVector<Register, 16> Shifts, Factors;
+ auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
+ bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
+
+ auto BuildExactUDIVPattern = [&](const Constant *C) {
+ // Don't recompute inverses for each splat element.
+ if (IsSplat && !Factors.empty()) {
+ Shifts.push_back(Shifts[0]);
+ Factors.push_back(Factors[0]);
+ return true;
+ }
+
+ auto *CI = cast<ConstantInt>(C);
+ APInt Divisor = CI->getValue();
+ unsigned Shift = Divisor.countr_zero();
+ if (Shift) {
+ Divisor.lshrInPlace(Shift);
+ UseSRL = true;
+ }
+
+ // Calculate the multiplicative inverse modulo BW.
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
----------------
jayfoad wrote:
Remove the comment about W + 1 bits.
https://github.com/llvm/llvm-project/pull/87161
More information about the llvm-commits
mailing list