[llvm] 69780be - [AggressiveInstCombine] Recognize table based log2 and replace with ctlz+sub. (#185160)

Sat Mar 14 17:39:02 PDT 2026

Author: Craig Topper
Date: 2026-03-14T17:38:56-07:00
New Revision: 69780be1d42a5fd218e36e854e72a5532ac7b502

URL: https://github.com/llvm/llvm-project/commit/69780be1d42a5fd218e36e854e72a5532ac7b502
DIFF: https://github.com/llvm/llvm-project/commit/69780be1d42a5fd218e36e854e72a5532ac7b502.diff

LOG: [AggressiveInstCombine] Recognize table based log2 and replace with ctlz+sub. (#185160)

Recognize table based log2 implementations like

```
unsigned log2(unsigned v) {
  static const unsigned char table[] = {
    0,  9,  1, 10, 13, 21,  2, 29, 11, 14, 16, 18, 22, 25,  3, 30,
    8, 12, 20, 28, 15, 17, 24,  7, 19, 27, 23,  6, 26,  5,  4, 31
  };

  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;

  return table[(unsigned)(v * 0x07C4ACDDU) >> 27];
}
```

and replaces with 31 - llvm.ctlz(v).

Similar for i64 log2. Other sizes can be supported with correct multiply
constant and table values, but I have not found examples yet.

This code is based on the existing tryToRecognizeTableBasedCttz. Like
that function, we support
any combination of multiply constant and table values that produce the
correct result.

It handles the same pattern as #177110, but does not match the outer
subtract from that patch. It is assumed that InstCombine or other
optimizations can combine (sub 31 (sub 31, cttz V)) later.

I have limited this to targets that have a fast ctlz. The backend does
not yet have a table based lowering for ctlz so this reduces the chance
of regressions.

Added: 
    llvm/test/Transforms/AggressiveInstCombine/AArch64/lower-table-based-log2-basics.ll
    llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-basics.ll
    llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-negative.ll

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index cf63ce66a606a..c53435db356bd 100644

--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -640,6 +640,189 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
   return true;
 }
 
+// Check if this array of constants represents a log2 table.
+// Iterate over the elements from \p Table by trying to find/match all
+// the numbers from 0 to \p InputBits that should represent log2 results.
+static bool isLog2Table(Constant *Table, const APInt &Mul, const APInt &Shift,
+                        Type *AccessTy, unsigned InputBits,
+                        const APInt &GEPIdxFactor, const DataLayout &DL) {
+  for (unsigned Idx = 0; Idx < InputBits; Idx++) {
+    APInt Index = (APInt::getLowBitsSet(InputBits, Idx + 1) * Mul).lshr(Shift);
+    ConstantInt *C = dyn_cast_or_null<ConstantInt>(
+        ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
+    if (!C || C->getValue() != Idx)
+      return false;
+  }
+
+  // Verify that an input of zero will select table index 0.
+  APInt ZeroIndex = Mul.lshr(Shift);
+  if (!ZeroIndex.isZero())
+    return false;
+
+  return true;
+}
+
+// Try to recognize table-based log2 implementation.
+// E.g., an example in C (for more cases please the llvm/tests):
+// int f(unsigned v) {
+//    static const char table[32] =
+//    {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+//     8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
+//
+//    v |= v >> 1; // first round down to one less than a power of 2
+//    v |= v >> 2;
+//    v |= v >> 4;
+//    v |= v >> 8;
+//    v |= v >> 16;
+//
+//    return table[(unsigned)(v * 0x07C4ACDDU) >> 27];
+// }
+// this can be lowered to `ctlz` instruction.
+// There is also a special case when the element is 0.
+//
+// The >> and |= sequence sets all bits below the most significant set bit. The
+// multiply is a de-bruijn sequence that contains each pattern of bits in it.
+// The shift extracts the top bits after the multiply, and that index into the
+// table should represent the floor log base 2 of the original number.
+//
+// Here are some examples of LLVM IR for a 64-bit target.
+//
+// CASE 1:
+// %shr = lshr i32 %v, 1
+// %or = or i32 %shr, %v
+// %shr1 = lshr i32 %or, 2
+// %or2 = or i32 %shr1, %or
+// %shr3 = lshr i32 %or2, 4
+// %or4 = or i32 %shr3, %or2
+// %shr5 = lshr i32 %or4, 8
+// %or6 = or i32 %shr5, %or4
+// %shr7 = lshr i32 %or6, 16
+// %or8 = or i32 %shr7, %or6
+// %mul = mul i32 %or8, 130329821
+// %shr9 = lshr i32 %mul, 27
+// %idxprom = zext nneg i32 %shr9 to i64
+// %arrayidx = getelementptr inbounds i8, ptr @table, i64 %idxprom
+// %0 = load i8, ptr %arrayidx, align 1
+//
+// CASE 2:
+// %shr = lshr i64 %v, 1
+// %or = or i64 %shr, %v
+// %shr1 = lshr i64 %or, 2
+// %or2 = or i64 %shr1, %or
+// %shr3 = lshr i64 %or2, 4
+// %or4 = or i64 %shr3, %or2
+// %shr5 = lshr i64 %or4, 8
+// %or6 = or i64 %shr5, %or4
+// %shr7 = lshr i64 %or6, 16
+// %or8 = or i64 %shr7, %or6
+// %shr9 = lshr i64 %or8, 32
+// %or10 = or i64 %shr9, %or8
+// %mul = mul i64 %or10, 285870213051386505
+// %shr11 = lshr i64 %mul, 58
+// %arrayidx = getelementptr inbounds i8, ptr @table, i64 %shr11
+// %0 = load i8, ptr %arrayidx, align 1
+//
+// All these can be lowered to @llvm.ctlz.i32/64 intrinsics and a subtract.
+static bool tryToRecognizeTableBasedLog2(Instruction &I, const DataLayout &DL,
+                                         TargetTransformInfo &TTI) {
+  LoadInst *LI = dyn_cast<LoadInst>(&I);
+  if (!LI)
+    return false;
+
+  Type *AccessType = LI->getType();
+  if (!AccessType->isIntegerTy())
+    return false;
+
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
+  if (!GEP || !GEP->hasNoUnsignedSignedWrap())
+    return false;
+
+  GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
+  if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
+    return false;
+
+  unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+  APInt ModOffset(BW, 0);
+  SmallMapVector<Value *, APInt, 4> VarOffsets;
+  if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
+      VarOffsets.size() != 1 || ModOffset != 0)
+    return false;
+  auto [GepIdx, GEPScale] = VarOffsets.front();
+
+  Value *X;
+  const APInt *MulConst, *ShiftConst;
+  // Check that the gep variable index is (x * MulConst) >> ShiftConst.
+  auto MatchInner =
+      m_LShr(m_Mul(m_Value(X), m_APInt(MulConst)), m_APInt(ShiftConst));
+  if (!match(GepIdx, m_CastOrSelf(MatchInner)))
+    return false;
+
+  unsigned InputBits = X->getType()->getScalarSizeInBits();
+  if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
+    return false;
+
+  // Verify shift amount.
+  // TODO: Allow other shift amounts when we have proper test coverage.
+  if (*ShiftConst != InputBits - Log2_32(InputBits))
+    return false;
+
+  // Match the sequence of OR operations with right shifts by powers of 2.
+  for (unsigned ShiftAmt = InputBits / 2; ShiftAmt != 0; ShiftAmt /= 2) {
+    Value *Y;
+    if (!match(X, m_c_Or(m_LShr(m_Value(Y), m_SpecificInt(ShiftAmt)),
+                         m_Deferred(Y))))
+      return false;
+    X = Y;
+  }
+
+  if (!GEPScale.isIntN(InputBits) ||
+      !isLog2Table(GVTable->getInitializer(), *MulConst, *ShiftConst,
+                   AccessType, InputBits, GEPScale.zextOrTrunc(InputBits), DL))
+    return false;
+
+  ConstantInt *ZeroTableElem = cast<ConstantInt>(
+      ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
+
+  // Use InputBits - 1 - ctlz(X) to compute log2(X).
+  IRBuilder<> B(LI);
+  ConstantInt *BoolConst = B.getTrue();
+  Type *XType = X->getType();
+
+  // Check the the backend has an efficient ctlz instruction.
+  // FIXME: Teach the backend to emit the original code when ctlz isn't
+  // supported like we do for cttz.
+  IntrinsicCostAttributes Attrs(
+      Intrinsic::ctlz, XType,
+      {PoisonValue::get(XType), /*is_zero_poison=*/BoolConst});
+  InstructionCost Cost =
+      TTI.getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+  if (Cost > TargetTransformInfo::TCC_Basic)
+    return false;
+
+  Value *Ctlz = B.CreateIntrinsic(Intrinsic::ctlz, {XType}, {X, BoolConst});
+
+  Constant *InputBitsM1 = ConstantInt::get(XType, InputBits - 1);
+  Value *Sub = B.CreateSub(InputBitsM1, Ctlz);
+
+  // The table won't produce a sensible result for 0.
+  Value *Cmp = B.CreateICmpEQ(X, ConstantInt::get(XType, 0));
+  Value *Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Sub);
+
+  // The true branch of select handles the log2(0) case, which is rare.
+  if (!ProfcheckDisableMetadataFixes) {
+    if (Instruction *SelectI = dyn_cast<Instruction>(Select))
+      SelectI->setMetadata(
+          LLVMContext::MD_prof,
+          MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());
+  }
+
+  Value *ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
+
+  LI->replaceAllUsesWith(ZExtOrTrunc);
+
+  return true;
+}
+
 /// This is used by foldLoadsRecursive() to capture a Root Load node which is
 /// of type or(load, load) and recursively build the wide load. Also capture the
 /// shift amount, zero extend type and loadSize.
@@ -1842,6 +2025,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       MadeChange |= tryToRecognizePopCount(I);
       MadeChange |= tryToFPToSat(I, TTI);
       MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
+      MadeChange |= tryToRecognizeTableBasedLog2(I, DL, TTI);
       MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
       MadeChange |= foldPatternedLoads(I, DL);
       MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/lower-table-based-log2-basics.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/lower-table-based-log2-basics.ll
new file mode 100644
index 0000000000000..4585af5dcf314
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/lower-table-based-log2-basics.ll
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=aggressive-instcombine -mtriple=aarch64 -S < %s | FileCheck %s
+
+;; These cases test lowering of various implementations of table-based log2
+;; algorithms to the llvm.ctlz instruction.
+
+;; C reproducers:
+;; int log2(unsigned v) {
+;;   static const unsigned char table[] = {
+;;     0,  9,  1, 10, 13, 21,  2, 29, 11, 14, 16, 18, 22, 25,  3, 30,
+;;     8, 12, 20, 28, 15, 17, 24,  7, 19, 27, 23,  6, 26,  5,  4, 31
+;;   };
+;;
+;;   v |= v >> 1;
+;;   v |= v >> 2;
+;;   v |= v >> 4;
+;;   v |= v >> 8;
+;;   v |= v >> 16;
+;;
+;;   return table[(unsigned)(v * 0x07C4ACDDU) >> 27];
+;; }
+;; int log2_64(unsigned long long v) {
+;;   static const unsigned char table[] = {
+;;      0, 47,  1, 56, 48, 27,  2, 60, 57, 49, 41, 37, 28, 16,  3, 61,
+;;     54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11,  4, 62,
+;;     46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
+;;     25, 39, 14, 33, 19, 30,  9, 24, 13, 18,  8, 12,  7,  6,  5, 63
+;;   };
+;;
+;;   v |= v >> 1;
+;;   v |= v >> 2;
+;;   v |= v >> 4;
+;;   v |= v >> 8;
+;;   v |= v >> 16;
+;;   v |= v >> 32;
+;;
+;;   return table[(v * 0x03F79D71B4CB0A89ULL) >> 58];
+;; }
+
+ at log2.table = internal unnamed_addr constant [32 x i8] c"\00\09\01\0A\0D\15\02\1D\0B\0E\10\12\16\19\03\1E\08\0C\14\1C\0F\11\18\07\13\1B\17\06\1A\05\04\1F", align 1
+
+define i32 @log2_32(i32 %v) !prof !0 {
+; CHECK-LABEL: @log2_32(
+; CHECK: !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 31, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 16
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @log2_32_nusw(i32 %v) {
+; CHECK-LABEL: @log2_32_nusw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 31, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 16
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr nusw [32 x i8], ptr @log2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+ at log2_64.table = internal unnamed_addr constant [64 x i8] c"\00/\0180\1B\02<91)%\1C\10\03=6:#42*\15,& \1D\17\11\0B\04>.7\1A;($\0F5\223\14+\1F\16\0A-\19'\0E!\13\1E\09\18\0D\12\08\0C\07\06\05?", align 1
+
+define i32 @log2_64(i64 noundef %v) {
+; CHECK-LABEL: @log2_64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 63, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[V]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP1]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i64 %v, 1
+  %or = or i64 %shr, %v
+  %shr1 = lshr i64 %or, 2
+  %or2 = or i64 %shr1, %or
+  %shr3 = lshr i64 %or2, 4
+  %or4 = or i64 %shr3, %or2
+  %shr5 = lshr i64 %or4, 8
+  %or6 = or i64 %shr5, %or4
+  %shr7 = lshr i64 %or6, 16
+  %or8 = or i64 %shr7, %or6
+  %shr9 = lshr i64 %or8, 32
+  %or10 = or i64 %shr9, %or8
+  %mul = mul i64 %or10, 285870213051386505
+  %shr11 = lshr i64 %mul, 58
+  %arrayidx = getelementptr inbounds i8, ptr @log2_64.table, i64 %shr11
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+ at log2_16.table = internal unnamed_addr constant [16 x i8] c"\00\07\01\0D\08\0A\02\0E\06\0C\09\05\0B\04\03\0F", align 1
+
+define i32 @log2_16(i16 noundef %0) {
+; CHECK-LABEL: @log2_16(
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[TMP0:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i16 15, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i16 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i16 0, i16 [[TMP3]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc i16 [[TMP5]] to i8
+; CHECK-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP14]] to i32
+; CHECK-NEXT:    ret i32 [[TMP15]]
+;
+  %2 = lshr i16 %0, 1
+  %3 = or i16 %2, %0
+  %4 = lshr i16 %3, 2
+  %5 = or i16 %4, %3
+  %6 = lshr i16 %5, 4
+  %7 = or i16 %6, %5
+  %8 = lshr i16 %7, 8
+  %9 = or i16 %8, %7
+  %10 = mul i16 %9, 3885
+  %11 = lshr i16 %10, 12
+  %12 = zext nneg i16 %11 to i64
+  %13 = getelementptr inbounds nuw i8, ptr @log2_16.table, i64 %12
+  %14 = load i8, ptr %13, align 1
+  %15 = zext i8 %14 to i32
+  ret i32 %15
+}
+
+
+ at log2_128.table = internal unnamed_addr constant [128 x i8] c"\00\0D\01\0E\13&\02\0F\1A\14A!'H\03\10\1E\1B9\15.B[\225(c<Ij\04}\11\18\1F\1C,3:\161/NCPU\\#E6g)Rdp=WJs^kw\05~\0C\12%\19@ G\1D8-Z4b;i|\17+20MOTDfQoVr]v\0B$?F7Yah{*LSenqu\0A>X`zKmt\09_yl\08x\07\06\7F", align 1
+
+define i32 @log2_128(i128 noundef %0) {
+; CHECK-LABEL: @log2_128(
+; CHECK-NEXT:    [[TMP2:%.*]] = call i128 @llvm.ctlz.i128(i128 [[TMP0:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i128 127, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i128 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i128 0, i128 [[TMP3]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP20:%.*]] = trunc i128 [[TMP5]] to i8
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i8 [[TMP20]] to i32
+; CHECK-NEXT:    ret i32 [[TMP21]]
+;
+  %2 = lshr i128 %0, 1
+  %3 = or i128 %2, %0
+  %4 = lshr i128 %3, 2
+  %5 = or i128 %4, %3
+  %6 = lshr i128 %5, 4
+  %7 = or i128 %6, %5
+  %8 = lshr i128 %7, 8
+  %9 = or i128 %8, %7
+  %10 = lshr i128 %9, 16
+  %11 = or i128 %10, %9
+  %12 = lshr i128 %11, 32
+  %13 = or i128 %12, %11
+  %14 = lshr i128 %13, 64
+  %15 = or i128 %14, %13
+  %16 = mul i128 %15, 2638024179347461332462726661865453437
+  %17 = lshr i128 %16, 121
+  %18 = trunc nuw nsw i128 %17 to i64
+  %19 = getelementptr inbounds nuw i8, ptr @log2_128.table, i64 %18
+  %20 = load i8, ptr %19, align 1
+  %21 = zext i8 %20 to i32
+  ret i32 %21
+}
+
+!0 = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-basics.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-basics.ll
new file mode 100644
index 0000000000000..2b340faab1300
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-basics.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=aggressive-instcombine -mtriple=x86_64 -mattr=+lzcnt -S < %s | FileCheck %s
+
+;; These cases test lowering of various implementations of table-based log2
+;; algorithms to the llvm.ctlz instruction.
+
+;; C reproducers:
+;; int log2(unsigned v) {
+;;   static const unsigned char table[] = {
+;;     0,  9,  1, 10, 13, 21,  2, 29, 11, 14, 16, 18, 22, 25,  3, 30,
+;;     8, 12, 20, 28, 15, 17, 24,  7, 19, 27, 23,  6, 26,  5,  4, 31
+;;   };
+;;
+;;   v |= v >> 1;
+;;   v |= v >> 2;
+;;   v |= v >> 4;
+;;   v |= v >> 8;
+;;   v |= v >> 16;
+;;
+;;   return table[(unsigned)(v * 0x07C4ACDDU) >> 27];
+;; }
+;; int log2_64(unsigned long long v) {
+;;   static const unsigned char table[] = {
+;;      0, 47,  1, 56, 48, 27,  2, 60, 57, 49, 41, 37, 28, 16,  3, 61,
+;;     54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11,  4, 62,
+;;     46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
+;;     25, 39, 14, 33, 19, 30,  9, 24, 13, 18,  8, 12,  7,  6,  5, 63
+;;   };
+;;
+;;   v |= v >> 1;
+;;   v |= v >> 2;
+;;   v |= v >> 4;
+;;   v |= v >> 8;
+;;   v |= v >> 16;
+;;   v |= v >> 32;
+;;
+;;   return table[(v * 0x03F79D71B4CB0A89ULL) >> 58];
+;; }
+
+ at log2.table = internal unnamed_addr constant [32 x i8] c"\00\09\01\0A\0D\15\02\1D\0B\0E\10\12\16\19\03\1E\08\0C\14\1C\0F\11\18\07\13\1B\17\06\1A\05\04\1F", align 1
+
+define i32 @log2_32(i32 %v) !prof !0 {
+; CHECK-LABEL: @log2_32(
+; CHECK: !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 31, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 16
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @log2_32_nusw(i32 %v) {
+; CHECK-LABEL: @log2_32_nusw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 31, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 16
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr nusw [32 x i8], ptr @log2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+ at log2_64.table = internal unnamed_addr constant [64 x i8] c"\00/\0180\1B\02<91)%\1C\10\03=6:#42*\15,& \1D\17\11\0B\04>.7\1A;($\0F5\223\14+\1F\16\0A-\19'\0E!\13\1E\09\18\0D\12\08\0C\07\06\05?", align 1
+
+define i32 @log2_64(i64 noundef %v) {
+; CHECK-LABEL: @log2_64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 63, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[V]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP1]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i64 %v, 1
+  %or = or i64 %shr, %v
+  %shr1 = lshr i64 %or, 2
+  %or2 = or i64 %shr1, %or
+  %shr3 = lshr i64 %or2, 4
+  %or4 = or i64 %shr3, %or2
+  %shr5 = lshr i64 %or4, 8
+  %or6 = or i64 %shr5, %or4
+  %shr7 = lshr i64 %or6, 16
+  %or8 = or i64 %shr7, %or6
+  %shr9 = lshr i64 %or8, 32
+  %or10 = or i64 %shr9, %or8
+  %mul = mul i64 %or10, 285870213051386505
+  %shr11 = lshr i64 %mul, 58
+  %arrayidx = getelementptr inbounds i8, ptr @log2_64.table, i64 %shr11
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+ at log2_16.table = internal unnamed_addr constant [16 x i8] c"\00\07\01\0D\08\0A\02\0E\06\0C\09\05\0B\04\03\0F", align 1
+
+define i32 @log2_16(i16 noundef %0) {
+; CHECK-LABEL: @log2_16(
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[TMP0:%.*]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i16 15, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i16 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i16 0, i16 [[TMP3]], !prof [[PROF1]]
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc i16 [[TMP5]] to i8
+; CHECK-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP14]] to i32
+; CHECK-NEXT:    ret i32 [[TMP15]]
+;
+  %2 = lshr i16 %0, 1
+  %3 = or i16 %2, %0
+  %4 = lshr i16 %3, 2
+  %5 = or i16 %4, %3
+  %6 = lshr i16 %5, 4
+  %7 = or i16 %6, %5
+  %8 = lshr i16 %7, 8
+  %9 = or i16 %8, %7
+  %10 = mul i16 %9, 3885
+  %11 = lshr i16 %10, 12
+  %12 = zext nneg i16 %11 to i64
+  %13 = getelementptr inbounds nuw i8, ptr @log2_16.table, i64 %12
+  %14 = load i8, ptr %13, align 1
+  %15 = zext i8 %14 to i32
+  ret i32 %15
+}
+
+
+ at log2_128.table = internal unnamed_addr constant [128 x i8] c"\00\0D\01\0E\13&\02\0F\1A\14A!'H\03\10\1E\1B9\15.B[\225(c<Ij\04}\11\18\1F\1C,3:\161/NCPU\\#E6g)Rdp=WJs^kw\05~\0C\12%\19@ G\1D8-Z4b;i|\17+20MOTDfQoVr]v\0B$?F7Yah{*LSenqu\0A>X`zKmt\09_yl\08x\07\06\7F", align 1
+
+define i32 @log2_128(i128 noundef %0) {
+; CHECK-LABEL: @log2_128(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i128 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i128 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i128 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i128 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr i128 [[TMP5]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = or i128 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i128 [[TMP7]], 8
+; CHECK-NEXT:    [[TMP9:%.*]] = or i128 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i128 [[TMP9]], 16
+; CHECK-NEXT:    [[TMP11:%.*]] = or i128 [[TMP10]], [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i128 [[TMP11]], 32
+; CHECK-NEXT:    [[TMP13:%.*]] = or i128 [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr i128 [[TMP13]], 64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]]
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i128 [[TMP15]], 2638024179347461332462726661865453437
+; CHECK-NEXT:    [[TMP17:%.*]] = lshr i128 [[TMP16]], 121
+; CHECK-NEXT:    [[TMP18:%.*]] = trunc nuw nsw i128 [[TMP17]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr @log2_128.table, i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i8 [[TMP20]] to i32
+; CHECK-NEXT:    ret i32 [[TMP21]]
+;
+  %2 = lshr i128 %0, 1
+  %3 = or i128 %2, %0
+  %4 = lshr i128 %3, 2
+  %5 = or i128 %4, %3
+  %6 = lshr i128 %5, 4
+  %7 = or i128 %6, %5
+  %8 = lshr i128 %7, 8
+  %9 = or i128 %8, %7
+  %10 = lshr i128 %9, 16
+  %11 = or i128 %10, %9
+  %12 = lshr i128 %11, 32
+  %13 = or i128 %12, %11
+  %14 = lshr i128 %13, 64
+  %15 = or i128 %14, %13
+  %16 = mul i128 %15, 2638024179347461332462726661865453437
+  %17 = lshr i128 %16, 121
+  %18 = trunc nuw nsw i128 %17 to i64
+  %19 = getelementptr inbounds nuw i8, ptr @log2_128.table, i64 %18
+  %20 = load i8, ptr %19, align 1
+  %21 = zext i8 %20 to i32
+  ret i32 %21
+}
+
+!0 = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-negative.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-negative.ll
new file mode 100644
index 0000000000000..4968b01eceee1
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/lower-table-based-log2-negative.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=aggressive-instcombine -mtriple=x86_64 -mattr=+lzcnt -S < %s | FileCheck %s
+
+ at log2.table = internal unnamed_addr constant [32 x i8] c"\05\09\01\0A\0D\15\02\1D\0B\0E\10\12\16\19\03\1E\08\0C\14\1C\0F\11\18\07\13\1B\17\06\1A\05\04\1F", align 1
+
+;; This is a negative test with a wrong table constant.
+
+define i32 @log2_32(i32 %v) {
+; CHECK-LABEL: @log2_32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[V]]
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 [[OR]], 2
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHR1]], [[OR]]
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[OR2]], 4
+; CHECK-NEXT:    [[OR4:%.*]] = or i32 [[SHR3]], [[OR2]]
+; CHECK-NEXT:    [[SHR5:%.*]] = lshr i32 [[OR4]], 8
+; CHECK-NEXT:    [[OR6:%.*]] = or i32 [[SHR5]], [[OR4]]
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[OR6]], 16
+; CHECK-NEXT:    [[OR8:%.*]] = or i32 [[SHR7]], [[OR6]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR8]], 130329821
+; CHECK-NEXT:    [[SHR9:%.*]] = lshr i32 [[MUL]], 27
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[SHR9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @log2.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 16
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+ at log2_2.table = internal unnamed_addr constant [32 x i8] c"\00\09\01\0A\0D\15\02\1D\0B\0E\10\12\16\19\03\1E\08\0C\14\1C\0F\11\18\07\13\1B\17\06\1A\05\04\1F", align 1
+
+;; These are some negative tests with a wrong instruction sequences.
+
+define i32 @log2_32_2(i32 %v) {
+; CHECK-LABEL: @log2_32_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[V]]
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 [[OR]], 2
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHR1]], [[OR]]
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[OR2]], 4
+; CHECK-NEXT:    [[OR4:%.*]] = or i32 [[SHR3]], [[OR2]]
+; CHECK-NEXT:    [[SHR5:%.*]] = lshr i32 [[OR4]], 8
+; CHECK-NEXT:    [[OR6:%.*]] = or i32 [[SHR5]], [[OR4]]
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[OR6]], 15
+; CHECK-NEXT:    [[OR8:%.*]] = or i32 [[SHR7]], [[OR6]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR8]], 130329821
+; CHECK-NEXT:    [[SHR9:%.*]] = lshr i32 [[MUL]], 27
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[SHR9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @log2_2.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 15
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2_2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @log2_32_3(i32 %v) {
+; CHECK-LABEL: @log2_32_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[V]]
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 [[OR]], 2
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHR1]], [[OR]]
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[OR2]], 4
+; CHECK-NEXT:    [[OR4:%.*]] = or i32 [[SHR3]], [[OR2]]
+; CHECK-NEXT:    [[SHR5:%.*]] = lshr i32 [[OR4]], 8
+; CHECK-NEXT:    [[OR6:%.*]] = or i32 [[SHR5]], [[OR4]]
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[OR6]], 16
+; CHECK-NEXT:    [[OR8:%.*]] = or i32 [[SHR7]], [[OR6]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR8]], 130329822
+; CHECK-NEXT:    [[SHR9:%.*]] = lshr i32 [[MUL]], 27
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[SHR9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @log2_2.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 16
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329822
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2_2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @log2_32_4(i32 %v) {
+; CHECK-LABEL: @log2_32_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[V]]
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 [[OR]], 2
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHR1]], [[OR]]
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[OR2]], 4
+; CHECK-NEXT:    [[OR4:%.*]] = or i32 [[SHR3]], [[OR2]]
+; CHECK-NEXT:    [[SHR5:%.*]] = lshr i32 [[OR4]], 8
+; CHECK-NEXT:    [[OR6:%.*]] = or i32 [[SHR5]], [[OR4]]
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[OR6]], 15
+; CHECK-NEXT:    [[OR8:%.*]] = or i32 [[SHR7]], [[OR6]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR8]], 130329821
+; CHECK-NEXT:    [[SHR9:%.*]] = lshr i32 [[MUL]], 26
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[SHR9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @log2_2.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 15
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 26
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2_2.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+;; A test with an extern global variable representing the table.
+
+ at table = external global [32 x i8], align 1
+define i32 @log2_32_5(i32 %v) {
+; CHECK-LABEL: @log2_32_5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[V]]
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 [[OR]], 2
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHR1]], [[OR]]
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[OR2]], 4
+; CHECK-NEXT:    [[OR4:%.*]] = or i32 [[SHR3]], [[OR2]]
+; CHECK-NEXT:    [[SHR5:%.*]] = lshr i32 [[OR4]], 8
+; CHECK-NEXT:    [[OR6:%.*]] = or i32 [[SHR5]], [[OR4]]
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[OR6]], 15
+; CHECK-NEXT:    [[OR8:%.*]] = or i32 [[SHR7]], [[OR6]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR8]], 130329821
+; CHECK-NEXT:    [[SHR9:%.*]] = lshr i32 [[MUL]], 26
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[SHR9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 15
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 26
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+;; We want only constant tables to be considered as LOG2 ones.
+ at log2_3.table = global [32 x i8] c"\00\09\01\0A\0D\15\02\1D\0B\0E\10\12\16\19\03\1E\08\0C\14\1C\0F\11\18\07\13\1B\17\06\1A\05\04\1F", align 1
+
+;; These are some negative tests with a wrong instruction sequences.
+
+define i32 @log2_32_6(i32 %v) {
+; CHECK-LABEL: @log2_32_6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[V]]
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 [[OR]], 2
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHR1]], [[OR]]
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[OR2]], 4
+; CHECK-NEXT:    [[OR4:%.*]] = or i32 [[SHR3]], [[OR2]]
+; CHECK-NEXT:    [[SHR5:%.*]] = lshr i32 [[OR4]], 8
+; CHECK-NEXT:    [[OR6:%.*]] = or i32 [[SHR5]], [[OR4]]
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[OR6]], 15
+; CHECK-NEXT:    [[OR8:%.*]] = or i32 [[SHR7]], [[OR6]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR8]], 130329821
+; CHECK-NEXT:    [[SHR9:%.*]] = lshr i32 [[MUL]], 27
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[SHR9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @log2_3.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %shr = lshr i32 %v, 1
+  %or = or i32 %shr, %v
+  %shr1 = lshr i32 %or, 2
+  %or2 = or i32 %shr1, %or
+  %shr3 = lshr i32 %or2, 4
+  %or4 = or i32 %shr3, %or2
+  %shr5 = lshr i32 %or4, 8
+  %or6 = or i32 %shr5, %or4
+  %shr7 = lshr i32 %or6, 15
+  %or8 = or i32 %shr7, %or6
+  %mul = mul i32 %or8, 130329821
+  %shr9 = lshr i32 %mul, 27
+  %idxprom = zext nneg i32 %shr9 to i64
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @log2_3.table, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}