[llvm] [AggressiveInstCombine] Improve popcount matching if the input has known zero bits (PR #142501)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 09:08:42 PDT 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/142501
>From 19f994ef4cd337764d7680523b4631c4924723bf Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 2 Jun 2025 15:24:01 -0700
Subject: [PATCH 1/4] Pre-commit tests
---
.../AggressiveInstCombine/popcount.ll | 70 +++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 4a89705e17749..aaff5b298ef61 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -191,3 +191,73 @@ define <4 x i32> @popcount32vec(<4 x i32> %0) {
%13 = lshr <4 x i32> %12, <i32 24, i32 24, i32 24, i32 24>
ret <4 x i32> %13
}
+
+define signext i32 @popcount64_zext(i32 %x) {
+; CHECK-LABEL: @popcount64_zext(
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[ZEXT]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1431655765
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i64 [[ZEXT]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 3689348814741910323
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 3689348814741910323
+; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 1085102592571150095
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 72340172838076673
+; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 56
+; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
+; CHECK-NEXT: ret i32 [[TMP13]]
+;
+ %zext = zext i32 %x to i64
+ %1 = lshr i64 %zext, 1
+ %2 = and i64 %1, 1431655765
+ %3 = sub nsw i64 %zext, %2
+ %4 = and i64 %3, 3689348814741910323
+ %5 = lshr i64 %3, 2
+ %6 = and i64 %5, 3689348814741910323
+ %7 = add nuw nsw i64 %6, %4
+ %8 = lshr i64 %7, 4
+ %9 = add nuw nsw i64 %8, %7
+ %10 = and i64 %9, 1085102592571150095
+ %11 = mul i64 %10, 72340172838076673
+ %12 = lshr i64 %11, 56
+ %13 = trunc nuw nsw i64 %12 to i32
+ ret i32 %13
+}
+
+define signext i32 @popcount64_mask(i64 %x) {
+; CHECK-LABEL: @popcount64_mask(
+; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X:%.*]], -281470681808896
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[MASK]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 6148820867675914240
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i64 [[MASK]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 3689348814741910323
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 3689348814741910323
+; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 1085102592571150095
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 72340172838076673
+; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 56
+; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
+; CHECK-NEXT: ret i32 [[TMP13]]
+;
+ %mask = and i64 %x, -281470681808896 ; 0xffff0000ffff0000
+ %1 = lshr i64 %mask, 1
+ %2 = and i64 %1, 6148820867675914240 ; 0x0x5555000055550000
+ %3 = sub nsw i64 %mask, %2
+ %4 = and i64 %3, 3689348814741910323
+ %5 = lshr i64 %3, 2
+ %6 = and i64 %5, 3689348814741910323
+ %7 = add nuw nsw i64 %6, %4
+ %8 = lshr i64 %7, 4
+ %9 = add nuw nsw i64 %8, %7
+ %10 = and i64 %9, 1085102592571150095
+ %11 = mul i64 %10, 72340172838076673
+ %12 = lshr i64 %11, 56
+ %13 = trunc nuw nsw i64 %12 to i32
+ ret i32 %13
+}
>From 6077fe05f8916fd4628fd09c10736530aac9465a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 2 Jun 2025 15:27:40 -0700
Subject: [PATCH 2/4] [AggressiveInstCombine] Improve popcount matching if the
input has known zero bits.
If the input has known zero bits, InstCombine may have simplied one
of the expected And masks. Teach AggressiveInstCombine to use
MaskedValueIsZero to recover these missing bits.
Fixes #142042.
---
.../AggressiveInstCombine.cpp | 32 +++++++++++++++----
.../AggressiveInstCombine/popcount.ll | 26 ++-------------
2 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c128687062ade..e8547fd4b3ced 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -328,15 +328,33 @@ static bool tryToRecognizePopCount(Instruction &I) {
m_SpecificInt(Mask33))))) {
Value *Root, *SubOp1;
// Matching "i - ((i >> 1) & 0x55555555...)".
+ const APInt *AndMask;
if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
- m_SpecificInt(Mask55)))) {
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(
- Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
- ++NumPopCountRecognized;
- return true;
+ m_APInt(AndMask)))) {
+ auto CheckAndMask = [&]() {
+ if (*AndMask == Mask55)
+ return true;
+
+ // Exact match failed, see if any bits are known to be 0 where we
+ // expect a 1 in the mask.
+ if (!AndMask->isSubsetOf(Mask55))
+ return false;
+
+ APInt NeededMask = Mask55 & ~*AndMask;
+ return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
+ NeededMask,
+ SimplifyQuery(I.getDataLayout()));
+ };
+
+ if (CheckAndMask()) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(
+ Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
+ ++NumPopCountRecognized;
+ return true;
+ }
}
}
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index aaff5b298ef61..2750de2aa7648 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -195,18 +195,7 @@ define <4 x i32> @popcount32vec(<4 x i32> %0) {
define signext i32 @popcount64_zext(i32 %x) {
; CHECK-LABEL: @popcount64_zext(
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[ZEXT]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1431655765
-; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i64 [[ZEXT]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 3689348814741910323
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 3689348814741910323
-; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP4]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 1085102592571150095
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 72340172838076673
-; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 56
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[ZEXT]])
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
; CHECK-NEXT: ret i32 [[TMP13]]
;
@@ -230,18 +219,7 @@ define signext i32 @popcount64_zext(i32 %x) {
define signext i32 @popcount64_mask(i64 %x) {
; CHECK-LABEL: @popcount64_mask(
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X:%.*]], -281470681808896
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[MASK]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 6148820867675914240
-; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i64 [[MASK]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 3689348814741910323
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 3689348814741910323
-; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP4]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 1085102592571150095
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 72340172838076673
-; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 56
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[MASK]])
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
; CHECK-NEXT: ret i32 [[TMP13]]
;
>From 972efeccb4c1ba7f8cfbbfa884b6de5bc0795b31 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 3 Jun 2025 09:08:25 -0700
Subject: [PATCH 3/4] Update
llvm/test/Transforms/AggressiveInstCombine/popcount.ll
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/test/Transforms/AggressiveInstCombine/popcount.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 2750de2aa7648..0c3879e6280b6 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -192,7 +192,7 @@ define <4 x i32> @popcount32vec(<4 x i32> %0) {
ret <4 x i32> %13
}
-define signext i32 @popcount64_zext(i32 %x) {
+define i32 @popcount64_zext(i32 %x) {
; CHECK-LABEL: @popcount64_zext(
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[ZEXT]])
>From 8c7a39091642ab211f9f41ac6cbdffa77ab64360 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 3 Jun 2025 09:08:32 -0700
Subject: [PATCH 4/4] Update
llvm/test/Transforms/AggressiveInstCombine/popcount.ll
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/test/Transforms/AggressiveInstCombine/popcount.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 0c3879e6280b6..f56cab1503531 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -216,7 +216,7 @@ define i32 @popcount64_zext(i32 %x) {
ret i32 %13
}
-define signext i32 @popcount64_mask(i64 %x) {
+define i32 @popcount64_mask(i64 %x) {
; CHECK-LABEL: @popcount64_mask(
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X:%.*]], -281470681808896
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[MASK]])
More information about the llvm-commits
mailing list