[llvm] [AggressiveInstCombine] Recognize table based log2 and replace with ctlz+sub. (PR #185160)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 09:29:49 PDT 2026
================
@@ -626,6 +626,205 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
return true;
}
+// Check if this array of constants represents a log2 table.
+// Iterate over the elements from \p Table by trying to find/match all
+// the numbers from 0 to \p InputBits that should represent log2 results.
+static bool isLog2Table(Constant *Table, const APInt &Mul, const APInt &Shift,
+ const APInt &AndMask, Type *AccessTy,
+ unsigned InputBits, const APInt &GEPIdxFactor,
+ const DataLayout &DL) {
+ for (unsigned Idx = 0; Idx < InputBits; Idx++) {
+ APInt Index =
+ (APInt::getLowBitsSet(InputBits, Idx + 1) * Mul).lshr(Shift) & AndMask;
+ ConstantInt *C = dyn_cast_or_null<ConstantInt>(
+ ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
+ if (!C || C->getValue() != Idx)
+ return false;
+ }
+
+ // Verify that an input of zero will select table index 0.
+ APInt ZeroIndex = Mul.lshr(Shift) & AndMask;
+ if (!ZeroIndex.isZero())
+ return false;
+
+ return true;
+}
+
+// Try to recognize table-based log2 implementation.
+// E.g., an exmapel in C (for more cases please the llvm/tests):
+// int f(unsigned x) {
+// static const char table[32] =
+// {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+// 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
+//
+// v |= v >> 1; // first round down to one less than a power of 2
+// v |= v >> 2;
+// v |= v >> 4;
+// v |= v >> 8;
+// v |= v >> 16;
+//
+// return table[(unsigned)(v * 0x07C4ACDDU) >> 27];
+// }
+// this can be lowered to `ctlz` instruction.
+// There is also a special case when the element is 0.
+//
+// The >> and |= sequence sets all bits below the most significant set bit. The
+// multiply is a de-bruijn sequence that contains each pattern of bits in it.
+// The shift extracts the top bits after the multiply, and that index into the
+// table should represent the floor log base 2 of the original number.
+//
+// Here are some examples of LLVM IR for a 64-bit target.
+//
+// CASE 1:
+// %shr = lshr i32 %v, 1
+// %or = or i32 %shr, %v
+// %shr1 = lshr i32 %or, 2
+// %or2 = or i32 %shr1, %or
+// %shr3 = lshr i32 %or2, 4
+// %or4 = or i32 %shr3, %or2
+// %shr5 = lshr i32 %or4, 8
+// %or6 = or i32 %shr5, %or4
+// %shr7 = lshr i32 %or6, 16
+// %or8 = or i32 %shr7, %or6
+// %mul = mul i32 %or8, 130329821
+// %shr9 = lshr i32 %mul, 27
+// %idxprom = zext nneg i32 %shr9 to i64
+// %arrayidx = getelementptr inbounds i8, ptr @table, i64 %idxprom
+// %0 = load i8, ptr %arrayidx, align 1
+//
+// CASE 2:
+// %shr = lshr i64 %v, 1
+// %or = or i64 %shr, %v
+// %shr1 = lshr i64 %or, 2
+// %or2 = or i64 %shr1, %or
+// %shr3 = lshr i64 %or2, 4
+// %or4 = or i64 %shr3, %or2
+// %shr5 = lshr i64 %or4, 8
+// %or6 = or i64 %shr5, %or4
+// %shr7 = lshr i64 %or6, 16
+// %or8 = or i64 %shr7, %or6
+// %shr9 = lshr i64 %or8, 32
+// %or10 = or i64 %shr9, %or8
+// %mul = mul i64 %or10, 285870213051386505
+// %shr11 = lshr i64 %mul, 58
+// %arrayidx = getelementptr inbounds i8, ptr @table, i64 %shr11
+// %0 = load i8, ptr %arrayidx, align 1/
+//
+// All these can be lowered to @llvm.cttz.i32/64 intrinsics and a subtract.
----------------
davemgreen wrote:
cttz -> ctlz?
https://github.com/llvm/llvm-project/pull/185160
More information about the llvm-commits
mailing list