[llvm] [AggressiveInstCombine] Improve popcount matching if the input has known zero bits (PR #142501)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 2 16:11:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
If the input has known zero bits, InstCombine may have simplied one
of the expected And masks. Teach AggressiveInstCombine to use
MaskedValueIsZero to recover these missing bits.
Fixes #<!-- -->142042.
---
Full diff: https://github.com/llvm/llvm-project/pull/142501.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp (+25-7)
- (modified) llvm/test/Transforms/AggressiveInstCombine/popcount.ll (+48)
``````````diff
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c128687062ade..e8547fd4b3ced 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -328,15 +328,33 @@ static bool tryToRecognizePopCount(Instruction &I) {
m_SpecificInt(Mask33))))) {
Value *Root, *SubOp1;
// Matching "i - ((i >> 1) & 0x55555555...)".
+ const APInt *AndMask;
if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
- m_SpecificInt(Mask55)))) {
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(
- Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
- ++NumPopCountRecognized;
- return true;
+ m_APInt(AndMask)))) {
+ auto CheckAndMask = [&]() {
+ if (*AndMask == Mask55)
+ return true;
+
+ // Exact match failed, see if any bits are known to be 0 where we
+ // expect a 1 in the mask.
+ if (!AndMask->isSubsetOf(Mask55))
+ return false;
+
+ APInt NeededMask = Mask55 & ~*AndMask;
+ return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
+ NeededMask,
+ SimplifyQuery(I.getDataLayout()));
+ };
+
+ if (CheckAndMask()) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(
+ Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
+ ++NumPopCountRecognized;
+ return true;
+ }
}
}
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 4a89705e17749..2750de2aa7648 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -191,3 +191,51 @@ define <4 x i32> @popcount32vec(<4 x i32> %0) {
%13 = lshr <4 x i32> %12, <i32 24, i32 24, i32 24, i32 24>
ret <4 x i32> %13
}
+
+define signext i32 @popcount64_zext(i32 %x) {
+; CHECK-LABEL: @popcount64_zext(
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[ZEXT]])
+; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
+; CHECK-NEXT: ret i32 [[TMP13]]
+;
+ %zext = zext i32 %x to i64
+ %1 = lshr i64 %zext, 1
+ %2 = and i64 %1, 1431655765
+ %3 = sub nsw i64 %zext, %2
+ %4 = and i64 %3, 3689348814741910323
+ %5 = lshr i64 %3, 2
+ %6 = and i64 %5, 3689348814741910323
+ %7 = add nuw nsw i64 %6, %4
+ %8 = lshr i64 %7, 4
+ %9 = add nuw nsw i64 %8, %7
+ %10 = and i64 %9, 1085102592571150095
+ %11 = mul i64 %10, 72340172838076673
+ %12 = lshr i64 %11, 56
+ %13 = trunc nuw nsw i64 %12 to i32
+ ret i32 %13
+}
+
+define signext i32 @popcount64_mask(i64 %x) {
+; CHECK-LABEL: @popcount64_mask(
+; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X:%.*]], -281470681808896
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[MASK]])
+; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
+; CHECK-NEXT: ret i32 [[TMP13]]
+;
+ %mask = and i64 %x, -281470681808896 ; 0xffff0000ffff0000
+ %1 = lshr i64 %mask, 1
+ %2 = and i64 %1, 6148820867675914240 ; 0x0x5555000055550000
+ %3 = sub nsw i64 %mask, %2
+ %4 = and i64 %3, 3689348814741910323
+ %5 = lshr i64 %3, 2
+ %6 = and i64 %5, 3689348814741910323
+ %7 = add nuw nsw i64 %6, %4
+ %8 = lshr i64 %7, 4
+ %9 = add nuw nsw i64 %8, %7
+ %10 = and i64 %9, 1085102592571150095
+ %11 = mul i64 %10, 72340172838076673
+ %12 = lshr i64 %11, 56
+ %13 = trunc nuw nsw i64 %12 to i32
+ ret i32 %13
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/142501
More information about the llvm-commits
mailing list