[llvm] [AggressiveInstCombine] POPCNT generation for bit-count pattern (PR #180917)
Rohit Aggarwal via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 11 03:15:32 PST 2026
https://github.com/rohitaggarwal007 created https://github.com/llvm/llvm-project/pull/180917
The proposal is to enhance LLVM by teaching it to recognize the pattern and replace it with the hardware POPCNT instruction.
#177109 has the first pattern of the popcnt fold.
>From b1ae92139795d550cc94d425432c90cab1501241 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 21 Jan 2026 12:52:41 +0530
Subject: [PATCH] [AggressiveInstCombine] POPCNT generation for bit-count
pattern
---
.../AggressiveInstCombine.cpp | 90 +++++++++++++++++++
.../AggressiveInstCombine/popcount.ll | 48 ++++++++++
2 files changed, 138 insertions(+)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 3341368208c24..5ca2fa14e1533 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -372,6 +372,95 @@ static bool tryToRecognizePopCount(Instruction &I) {
return false;
}
+// Try to recognize below function as popcount intrinsic.
+// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcnt(unsigned x) {
+// x = x - ((x >> 1) & 0x55555555);
+// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+// x = (x + (x >> 4)) & 0x0F0F0F0F;
+// x = x + (x >> 8);
+// x = x + (x >> 16);
+// return x & 0x0000003F;
+// }
+
+// int popcnt(unsigned x) {
+// x = x - ((x >> 1) & 0x55555555);
+// x = x - 3*((x >> 2) & 0x33333333);
+// x = (x + (x >> 4)) & 0x0F0F0F0F;
+// x = x + (x >> 8);
+// x = x + (x >> 16);
+// return x & 0x0000003F;
+// }
+
+static bool tryToRecognizePopCount2n3(Instruction &I) {
+ if (I.getOpcode() != Instruction::And)
+ return false;
+
+ Type *Ty = I.getType();
+ if (!Ty->isIntOrIntVectorTy())
+ return false;
+
+ unsigned Len = Ty->getScalarSizeInBits();
+ if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+ return false;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *LShrOp0;
+ Value *AddOp1;
+ // Matching "x & 0x0000003F".
+ if ((match(Op0, m_Add(m_Value(LShrOp0), m_Value(AddOp1)))) &&
+ match(Op1, m_SpecificInt(63))) {
+ Value *LShr1;
+ Value *And1;
+ // Matching "x = x + (x >> 16)".
+ if (match(LShrOp0, m_LShr(m_Add(m_Value(LShr1), m_Value(And1)),
+ m_SpecificInt(16)))) {
+ Value *Add2;
+ // Matching " x = x + (x >> 8)".
+ if (match(LShr1, m_LShr(m_Deferred(And1), m_SpecificInt(8))) &&
+ match(And1, m_c_And(m_Value(Add2), m_SpecificInt(252645135)))) {
+ Value *Add3;
+ // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
+ if (match(Add2, m_c_Add(m_LShr(m_Value(Add3), m_SpecificInt(4)),
+ m_Deferred(Add3)))) {
+ Value *Sub1;
+ llvm::APInt NegThree(/*BitWidth=*/32, /*Value=*/-3,
+ /*isSigned=*/true);
+ // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
+ if (match(Add3,
+ m_c_Add(
+ m_c_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
+ m_SpecificInt(858993459)),
+ m_c_And(m_Deferred(Sub1), m_SpecificInt(858993459)))) ||
+ // Matching "x = x - 3*((x >> 2) & 0x33333333)".
+ match(Add3,
+ m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
+ m_SpecificInt(858993459)),
+ m_SpecificInt(NegThree)),
+ m_Deferred(Sub1)))) {
+ Value *Root;
+ if (match(Sub1,
+ m_Sub(m_Value(Root),
+ m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),
+ m_SpecificInt(1431655765))))) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(Builder.CreateIntrinsic(
+ Intrinsic::ctpop, I.getType(), {Root}));
+ ++NumPopCountRecognized;
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
/// C2 saturate the value of the fp conversion. The transform is not reversable
/// as the fptosi.sat is more defined than the input - all values produce a
@@ -1826,6 +1915,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
+ MadeChange |= tryToRecognizePopCount2n3(I);
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index f56cab1503531..2c9fde6608984 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -239,3 +239,51 @@ define i32 @popcount64_mask(i64 %x) {
%13 = trunc nuw nsw i64 %12 to i32
ret i32 %13
}
+
+define dso_local noundef range(i32 0, 64) i32 @popcnt2(i32 noundef %0) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt2(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = lshr i32 %0, 1
+ %3 = and i32 %2, 1431655765
+ %4 = sub i32 %0, %3
+ %5 = and i32 %4, 858993459
+ %6 = lshr i32 %4, 2
+ %7 = and i32 %6, 858993459
+ %8 = add nuw nsw i32 %7, %5
+ %9 = lshr i32 %8, 4
+ %10 = add nuw nsw i32 %9, %8
+ %11 = and i32 %10, 252645135
+ %12 = lshr i32 %11, 8
+ %13 = add nuw nsw i32 %12, %11
+ %14 = lshr i32 %13, 16
+ %15 = add nuw nsw i32 %14, %13
+ %16 = and i32 %15, 63
+ ret i32 %16
+}
+
+define dso_local noundef range(i32 0, 64) i32 @popcnt3(i32 noundef %0) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt3(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
+; CHECK-NEXT: ret i32 [[TMP16]]
+;
+ %2 = lshr i32 %0, 1
+ %3 = and i32 %2, 1431655765
+ %4 = sub i32 %0, %3
+ %5 = lshr i32 %4, 2
+ %6 = and i32 %5, 858993459
+ %7 = mul i32 %6, -3
+ %8 = add i32 %7, %4
+ %9 = lshr i32 %8, 4
+ %10 = add i32 %9, %8
+ %11 = and i32 %10, 252645135
+ %12 = lshr i32 %11, 8
+ %13 = add nuw nsw i32 %12, %11
+ %14 = lshr i32 %13, 16
+ %15 = add nuw nsw i32 %14, %13
+ %16 = and i32 %15, 63
+ ret i32 %16
+}
More information about the llvm-commits
mailing list