[llvm] [AggressiveInstCombine] POPCNT generation for bit-count pattern (PR #177109)
Rohit Aggarwal via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 00:16:04 PDT 2026
https://github.com/rohitaggarwal007 updated https://github.com/llvm/llvm-project/pull/177109
>From cef5ff889c938ff56464f8064d0a74d4b624b368 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 21 Jan 2026 12:52:41 +0530
Subject: [PATCH 1/9] [AggressiveInstCombine] POPCNT generation for bit-count
pattern
---
.../AggressiveInstCombine.cpp | 165 ++++++++++++++++++
.../AggressiveInstCombine/popcount.ll | 79 +++++++++
2 files changed, 244 insertions(+)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 3341368208c24..94bf8348b4f19 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -372,6 +372,169 @@ static bool tryToRecognizePopCount(Instruction &I) {
return false;
}
+// Try to recognize below function as popcount intrinsic.
+// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcount(unsigned int i) {
+// uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);
+// uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);
+// uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F);
+// uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);
+// return (uWord & 0x0000FFFF) + (uWord>>16);
+// }
+static bool tryToRecognizePopCount1(Instruction &I) {
+ if (I.getOpcode() != Instruction::Add)
+ return false;
+
+ Type *Ty = I.getType();
+ if (!Ty->isIntOrIntVectorTy())
+ return false;
+
+ unsigned Len = Ty->getScalarSizeInBits();
+ if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+ return false;
+
+ APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
+ APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *LShrOp0;
+ // Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
+ if ((match(Op1, m_LShr(m_Value(LShrOp0), m_SpecificInt(16)))) &&
+ match(Op0, m_And(m_Deferred(LShrOp0), m_SpecificInt(31)))) {
+ Value *ShiftOp0;
+ // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
+ if (match(LShrOp0,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+ m_SpecificInt(983055)),
+ m_And(m_Deferred(ShiftOp0), m_SpecificInt(983055))))) {
+ Value *ShiftOp1;
+ // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
+ if (match(
+ ShiftOp0,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
+ m_SpecificInt(117901063)),
+ m_And(m_Deferred(ShiftOp1), m_SpecificInt(117901063))))) {
+ Value *ShiftOp2;
+ // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+ if (match(
+ ShiftOp1,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
+ m_SpecificInt(Mask33)),
+ m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
+ Value *ShiftOp3;
+ // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
+ // 0x55555555)".
+ if (match(ShiftOp2,
+ m_c_Add(
+ m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
+ m_SpecificInt(Mask55)),
+ m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(Builder.CreateIntrinsic(
+ Intrinsic::ctpop, I.getType(), {ShiftOp3}));
+ ++NumPopCountRecognized;
+ return true;
+ }
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// Try to recognize below function as popcount intrinsic.
+// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcnt(unsigned x) {
+// x = x - ((x >> 1) & 0x55555555);
+// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+// x = (x + (x >> 4)) & 0x0F0F0F0F;
+// x = x + (x >> 8);
+// x = x + (x >> 16);
+// return x & 0x0000003F;
+// }
+
+// int popcnt(unsigned x) {
+// x = x - ((x >> 1) & 0x55555555);
+// x = x - 3*((x >> 2) & 0x33333333);
+// x = (x + (x >> 4)) & 0x0F0F0F0F;
+// x = x + (x >> 8);
+// x = x + (x >> 16);
+// return x & 0x0000003F;
+// }
+
+static bool tryToRecognizePopCount2n3(Instruction &I) {
+ if (I.getOpcode() != Instruction::And)
+ return false;
+
+ Type *Ty = I.getType();
+ if (!Ty->isIntOrIntVectorTy())
+ return false;
+
+ unsigned Len = Ty->getScalarSizeInBits();
+ if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+ return false;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *LShrOp0;
+ Value *AddOp1;
+ // Matching "x & 0x0000003F".
+ if ((match(Op0, m_Add(m_Value(LShrOp0), m_Value(AddOp1)))) &&
+ match(Op1, m_SpecificInt(63))) {
+ Value *LShr1;
+ Value *And1;
+ // Matching "x = x + (x >> 16)".
+ if (match(LShrOp0, m_LShr(m_Add(m_Value(LShr1), m_Value(And1)),
+ m_SpecificInt(16)))) {
+ Value *Add2;
+ // Matching " x = x + (x >> 8)".
+ if (match(LShr1, m_LShr(m_Deferred(And1), m_SpecificInt(8))) &&
+ match(And1, m_c_And(m_Value(Add2), m_SpecificInt(252645135)))) {
+ Value *Add3;
+ // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
+ if (match(Add2, m_c_Add(m_LShr(m_Value(Add3), m_SpecificInt(4)),
+ m_Deferred(Add3)))) {
+ Value *Sub1;
+ llvm::APInt NegThree(/*BitWidth=*/32, /*Value=*/-3,
+ /*isSigned=*/true);
+ // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
+ if (match(Add3,
+ m_c_Add(
+ m_c_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
+ m_SpecificInt(858993459)),
+ m_c_And(m_Deferred(Sub1), m_SpecificInt(858993459)))) ||
+ // Matching "x = x - 3*((x >> 2) & 0x33333333)".
+ match(Add3,
+ m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
+ m_SpecificInt(858993459)),
+ m_SpecificInt(NegThree)),
+ m_Deferred(Sub1)))) {
+ Value *Root;
+ if (match(Sub1,
+ m_Sub(m_Value(Root),
+ m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),
+ m_SpecificInt(1431655765))))) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(Builder.CreateIntrinsic(
+ Intrinsic::ctpop, I.getType(), {Root}));
+ ++NumPopCountRecognized;
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
/// C2 saturate the value of the fp conversion. The transform is not reversable
/// as the fptosi.sat is more defined than the input - all values produce a
@@ -1826,6 +1989,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
+ MadeChange |= tryToRecognizePopCount1(I);
+ MadeChange |= tryToRecognizePopCount2n3(I);
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index f56cab1503531..9b4eea4513de9 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -239,3 +239,82 @@ define i32 @popcount64_mask(i64 %x) {
%13 = trunc nuw nsw i64 %12 to i32
ret i32 %13
}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef range(i32 0, 59) i32 @popcnt1(i32 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %and = and i32 %uWord, 1431655765
+ %shr = lshr i32 %uWord, 1
+ %and1 = and i32 %shr, 1431655765
+ %add = add nuw i32 %and1, %and
+ %and2 = and i32 %add, 858993459
+ %shr3 = lshr i32 %add, 2
+ %and4 = and i32 %shr3, 858993459
+ %add5 = add nuw nsw i32 %and4, %and2
+ %and6 = and i32 %add5, 117901063
+ %shr7 = lshr i32 %add5, 4
+ %and8 = and i32 %shr7, 117901063
+ %add9 = add nuw nsw i32 %and8, %and6
+ %and10 = and i32 %add9, 983055
+ %shr11 = lshr i32 %add9, 8
+ %and12 = and i32 %shr11, 983055
+ %add13 = add nuw nsw i32 %and12, %and10
+ %and14 = and i32 %add13, 31
+ %shr15 = lshr i32 %add13, 16
+ %add16 = add nuw nsw i32 %and14, %shr15
+ ret i32 %add16
+}
+
+define dso_local noundef range(i32 0, 64) i32 @popcnt2(i32 noundef %0) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt2(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = lshr i32 %0, 1
+ %3 = and i32 %2, 1431655765
+ %4 = sub i32 %0, %3
+ %5 = and i32 %4, 858993459
+ %6 = lshr i32 %4, 2
+ %7 = and i32 %6, 858993459
+ %8 = add nuw nsw i32 %7, %5
+ %9 = lshr i32 %8, 4
+ %10 = add nuw nsw i32 %9, %8
+ %11 = and i32 %10, 252645135
+ %12 = lshr i32 %11, 8
+ %13 = add nuw nsw i32 %12, %11
+ %14 = lshr i32 %13, 16
+ %15 = add nuw nsw i32 %14, %13
+ %16 = and i32 %15, 63
+ ret i32 %16
+}
+
+define dso_local noundef range(i32 0, 64) i32 @popcnt3(i32 noundef %0) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt3(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
+; CHECK-NEXT: ret i32 [[TMP16]]
+;
+ %2 = lshr i32 %0, 1
+ %3 = and i32 %2, 1431655765
+ %4 = sub i32 %0, %3
+ %5 = lshr i32 %4, 2
+ %6 = and i32 %5, 858993459
+ %7 = mul i32 %6, -3
+ %8 = add i32 %7, %4
+ %9 = lshr i32 %8, 4
+ %10 = add i32 %9, %8
+ %11 = and i32 %10, 252645135
+ %12 = lshr i32 %11, 8
+ %13 = add nuw nsw i32 %12, %11
+ %14 = lshr i32 %13, 16
+ %15 = add nuw nsw i32 %14, %13
+ %16 = and i32 %15, 63
+ ret i32 %16
+}
>From 507c16f86438eb7931f36307c806228d83ae8010 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 11 Feb 2026 16:34:51 +0530
Subject: [PATCH 2/9] Split the PR into two PRs
---
.../AggressiveInstCombine.cpp | 90 -------------------
.../AggressiveInstCombine/popcount.ll | 47 ----------
2 files changed, 137 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 94bf8348b4f19..f8804ba432e78 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -446,95 +446,6 @@ static bool tryToRecognizePopCount1(Instruction &I) {
return false;
}
-// Try to recognize below function as popcount intrinsic.
-// https://doc.lagout.org/security/Hackers%20Delight.pdf
-// Also used in TargetLowering::expandCTPOP().
-//
-// int popcnt(unsigned x) {
-// x = x - ((x >> 1) & 0x55555555);
-// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-// x = (x + (x >> 4)) & 0x0F0F0F0F;
-// x = x + (x >> 8);
-// x = x + (x >> 16);
-// return x & 0x0000003F;
-// }
-
-// int popcnt(unsigned x) {
-// x = x - ((x >> 1) & 0x55555555);
-// x = x - 3*((x >> 2) & 0x33333333);
-// x = (x + (x >> 4)) & 0x0F0F0F0F;
-// x = x + (x >> 8);
-// x = x + (x >> 16);
-// return x & 0x0000003F;
-// }
-
-static bool tryToRecognizePopCount2n3(Instruction &I) {
- if (I.getOpcode() != Instruction::And)
- return false;
-
- Type *Ty = I.getType();
- if (!Ty->isIntOrIntVectorTy())
- return false;
-
- unsigned Len = Ty->getScalarSizeInBits();
- if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
- return false;
-
- Value *Op0 = I.getOperand(0);
- Value *Op1 = I.getOperand(1);
- Value *LShrOp0;
- Value *AddOp1;
- // Matching "x & 0x0000003F".
- if ((match(Op0, m_Add(m_Value(LShrOp0), m_Value(AddOp1)))) &&
- match(Op1, m_SpecificInt(63))) {
- Value *LShr1;
- Value *And1;
- // Matching "x = x + (x >> 16)".
- if (match(LShrOp0, m_LShr(m_Add(m_Value(LShr1), m_Value(And1)),
- m_SpecificInt(16)))) {
- Value *Add2;
- // Matching " x = x + (x >> 8)".
- if (match(LShr1, m_LShr(m_Deferred(And1), m_SpecificInt(8))) &&
- match(And1, m_c_And(m_Value(Add2), m_SpecificInt(252645135)))) {
- Value *Add3;
- // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
- if (match(Add2, m_c_Add(m_LShr(m_Value(Add3), m_SpecificInt(4)),
- m_Deferred(Add3)))) {
- Value *Sub1;
- llvm::APInt NegThree(/*BitWidth=*/32, /*Value=*/-3,
- /*isSigned=*/true);
- // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
- if (match(Add3,
- m_c_Add(
- m_c_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
- m_SpecificInt(858993459)),
- m_c_And(m_Deferred(Sub1), m_SpecificInt(858993459)))) ||
- // Matching "x = x - 3*((x >> 2) & 0x33333333)".
- match(Add3,
- m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
- m_SpecificInt(858993459)),
- m_SpecificInt(NegThree)),
- m_Deferred(Sub1)))) {
- Value *Root;
- if (match(Sub1,
- m_Sub(m_Value(Root),
- m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),
- m_SpecificInt(1431655765))))) {
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(Builder.CreateIntrinsic(
- Intrinsic::ctpop, I.getType(), {Root}));
- ++NumPopCountRecognized;
- return true;
- }
- }
- }
- }
- }
- }
- return false;
-}
-
/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
/// C2 saturate the value of the fp conversion. The transform is not reversable
/// as the fptosi.sat is more defined than the input - all values produce a
@@ -1990,7 +1901,6 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
MadeChange |= tryToRecognizePopCount1(I);
- MadeChange |= tryToRecognizePopCount2n3(I);
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 9b4eea4513de9..3f5269a41a7f8 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -271,50 +271,3 @@ entry:
ret i32 %add16
}
-define dso_local noundef range(i32 0, 64) i32 @popcnt2(i32 noundef %0) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt2(
-; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
-; CHECK-NEXT: ret i32 [[TMP2]]
-;
- %2 = lshr i32 %0, 1
- %3 = and i32 %2, 1431655765
- %4 = sub i32 %0, %3
- %5 = and i32 %4, 858993459
- %6 = lshr i32 %4, 2
- %7 = and i32 %6, 858993459
- %8 = add nuw nsw i32 %7, %5
- %9 = lshr i32 %8, 4
- %10 = add nuw nsw i32 %9, %8
- %11 = and i32 %10, 252645135
- %12 = lshr i32 %11, 8
- %13 = add nuw nsw i32 %12, %11
- %14 = lshr i32 %13, 16
- %15 = add nuw nsw i32 %14, %13
- %16 = and i32 %15, 63
- ret i32 %16
-}
-
-define dso_local noundef range(i32 0, 64) i32 @popcnt3(i32 noundef %0) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt3(
-; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
-; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
-; CHECK-NEXT: ret i32 [[TMP16]]
-;
- %2 = lshr i32 %0, 1
- %3 = and i32 %2, 1431655765
- %4 = sub i32 %0, %3
- %5 = lshr i32 %4, 2
- %6 = and i32 %5, 858993459
- %7 = mul i32 %6, -3
- %8 = add i32 %7, %4
- %9 = lshr i32 %8, 4
- %10 = add i32 %9, %8
- %11 = and i32 %10, 252645135
- %12 = lshr i32 %11, 8
- %13 = add nuw nsw i32 %12, %11
- %14 = lshr i32 %13, 16
- %15 = add nuw nsw i32 %14, %13
- %16 = and i32 %15, 63
- ret i32 %16
-}
>From 669e15134fbd807009adbdc6c95936170ed5301d Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 25 Feb 2026 11:08:45 +0530
Subject: [PATCH 3/9] Update the logic to support generation of i16, i32 and
i64 popcnt intrinsic.
---
.../AggressiveInstCombine.cpp | 155 ++++++++---
.../AggressiveInstCombine/popcount.ll | 246 +++++++++++++++++-
2 files changed, 351 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index f8804ba432e78..c43f397d8b562 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -376,13 +376,21 @@ static bool tryToRecognizePopCount(Instruction &I) {
// https://doc.lagout.org/security/Hackers%20Delight.pdf
// Also used in TargetLowering::expandCTPOP().
//
-// int popcount(unsigned int i) {
+// int popcount32(unsigned int i) {
// uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);
// uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);
// uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F);
// uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);
// return (uWord & 0x0000FFFF) + (uWord>>16);
// }
+// int popcount64(unsigned long i) {
+// uWord = (uWord & 0x5555555555555555) + ((uWord>>1) & 0x5555555555555555);
+// uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);
+// uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);
+// uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);
+// return (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
+// return (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;
+// }
static bool tryToRecognizePopCount1(Instruction &I) {
if (I.getOpcode() != Instruction::Add)
return false;
@@ -392,54 +400,115 @@ static bool tryToRecognizePopCount1(Instruction &I) {
return false;
unsigned Len = Ty->getScalarSizeInBits();
- if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+ if (!(Len <= 64 && Len > 8 && Len % 8 == 0))
return false;
APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
- Value *Op0 = I.getOperand(0);
- Value *Op1 = I.getOperand(1);
+ APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
+ APInt Mask00FF;
+ if (Len <= 16) {
+ Mask00FF = APInt(16, 0x00FF);
+ } else {
+ Mask00FF = APInt::getSplat(Len, APInt(16, 0x00FF));
+ }
+ APInt Mask0000FFFF;
+ if (Len <= 32) {
+ Mask0000FFFF = APInt(32, 0x0000FFFF);
+ } else {
+ Mask0000FFFF = APInt::getSplat(Len, APInt(32, 0x0000FFFF));
+ }
+
+ APInt Mask64 = APInt(64, 0x00000000FFFFFFFF);
+ // Matching "(uWord & 0x00000000FFFFFFFF) + (uWord>>32)".
+ // OR
+ // Matching "(uWord & 0x00000000FFFFFFFF) + ((uWord>>32) &
+ // 0x00000000FFFFFFFF)".
+ Value *ShiftOp;
+ Value *Start = &I;
+ bool is64 = false;
+ if (match(Start,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
+ m_SpecificInt(Mask64)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64)))) ||
+ match(Start,
+ m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64))))) {
+ Start = ShiftOp;
+ is64 = true;
+ }
Value *LShrOp0;
// Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
- if ((match(Op1, m_LShr(m_Value(LShrOp0), m_SpecificInt(16)))) &&
- match(Op0, m_And(m_Deferred(LShrOp0), m_SpecificInt(31)))) {
- Value *ShiftOp0;
- // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
- if (match(LShrOp0,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
- m_SpecificInt(983055)),
- m_And(m_Deferred(ShiftOp0), m_SpecificInt(983055))))) {
- Value *ShiftOp1;
- // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
- if (match(
- ShiftOp0,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
- m_SpecificInt(117901063)),
- m_And(m_Deferred(ShiftOp1), m_SpecificInt(117901063))))) {
- Value *ShiftOp2;
- // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
- if (match(
- ShiftOp1,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
- m_SpecificInt(Mask33)),
- m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
- Value *ShiftOp3;
- // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
- // 0x55555555)".
- if (match(ShiftOp2,
- m_c_Add(
- m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
- m_SpecificInt(Mask55)),
- m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(Builder.CreateIntrinsic(
- Intrinsic::ctpop, I.getType(), {ShiftOp3}));
- ++NumPopCountRecognized;
- return true;
- }
- }
- }
+ // Matching "(uWord & 0x0000FFFF) + ((uWord>>16) & 0x0000FFFF)".
+ bool test16 = match(
+ Start, m_c_Add(m_And(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
+ m_SpecificInt(Mask0000FFFF)),
+ m_And(m_Deferred(LShrOp0), m_SpecificInt(Mask0000FFFF))));
+
+ bool is32 = false;
+ if ((is64 && test16) ||
+ (!is64 && Len == 32 &&
+ (test16 ||
+ match(Start, m_c_Add(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
+ m_And(m_Deferred(LShrOp0),
+ m_SpecificInt(Mask0000FFFF))))))) {
+ Start = LShrOp0;
+
+ is32 = true;
+ }
+ Value *ShiftOp0;
+ // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
+ // OR
+ // Matching "uWord = (uWord & 0x00FF00FF) + (uWord>>8) ;".
+ bool test8 = match(
+ Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+ m_SpecificInt(Mask00FF)),
+ m_And(m_Deferred(ShiftOp0), m_SpecificInt(Mask00FF))));
+
+ bool is16 = false;
+ if ((is32 && test8) ||
+ (!is32 && Len == 16 &&
+ (test8 ||
+ match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+ m_And(m_Deferred(ShiftOp0),
+ m_SpecificInt(Mask00FF))))))) {
+ Start = ShiftOp0;
+ is16 = true;
+ }
+
+ Value *ShiftOp1;
+ // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
+ bool test4 =
+ match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
+ m_SpecificInt(Mask0F)),
+ m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))));
+
+ bool is8 = false;
+ if ((is16 && test4)) {
+ Start = ShiftOp1;
+ is8 = true;
+ }
+
+ Value *ShiftOp2;
+ // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+ if (is8 &&
+ match(Start,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
+ m_SpecificInt(Mask33)),
+ m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
+ Value *ShiftOp3;
+ // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
+ // 0x55555555)".
+ if (match(ShiftOp2,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
+ m_SpecificInt(Mask55)),
+ m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(
+ Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp3}));
+ ++NumPopCountRecognized;
+ return true;
}
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 3f5269a41a7f8..f161b47428329 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -241,8 +241,8 @@ define i32 @popcount64_mask(i64 %x) {
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef range(i32 0, 59) i32 @popcnt1(i32 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1(
+define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(i32 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(
; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
@@ -257,17 +257,249 @@ entry:
%shr3 = lshr i32 %add, 2
%and4 = and i32 %shr3, 858993459
%add5 = add nuw nsw i32 %and4, %and2
- %and6 = and i32 %add5, 117901063
+ %and6 = and i32 %add5, 252645135
%shr7 = lshr i32 %add5, 4
- %and8 = and i32 %shr7, 117901063
+ %and8 = and i32 %shr7, 252645135
%add9 = add nuw nsw i32 %and8, %and6
- %and10 = and i32 %add9, 983055
+ %and10 = and i32 %add9, 16711935
%shr11 = lshr i32 %add9, 8
- %and12 = and i32 %shr11, 983055
+ %and12 = and i32 %shr11, 16711935
%add13 = add nuw nsw i32 %and12, %and10
- %and14 = and i32 %add13, 31
+ %and14 = and i32 %add13, 65535
%shr15 = lshr i32 %add13, 16
%add16 = add nuw nsw i32 %and14, %shr15
ret i32 %add16
}
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(i32 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %and = and i32 %uWord, 1431655765
+ %shr = lshr i32 %uWord, 1
+ %and1 = and i32 %shr, 1431655765
+ %add = add nuw i32 %and1, %and
+ %and2 = and i32 %add, 858993459
+ %shr3 = lshr i32 %add, 2
+ %and4 = and i32 %shr3, 858993459
+ %add5 = add nuw nsw i32 %and4, %and2
+ %and6 = and i32 %add5, 252645135
+ %shr7 = lshr i32 %add5, 4
+ %and8 = and i32 %shr7, 252645135
+ %add9 = add nuw nsw i32 %and8, %and6
+ %and10 = and i32 %add9, 16711935
+ %shr11 = lshr i32 %add9, 8
+ %and12 = and i32 %shr11, 16711935
+ %add13 = add nuw nsw i32 %and12, %and10
+ %and14 = and i32 %add13, 65535
+ %shr15 = lshr i32 %add13, 16
+ %and16 = and i32 %shr15, 65535
+ %add17 = add nuw nsw i32 %and14, %and16
+ ret i32 %add17
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef i64 @popcnt1_64(i64 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef i64 @popcnt1_64(
+; CHECK-SAME: i64 noundef [[UWORD:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[UWORD]])
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
+ %shr = lshr i64 %uWord, 1
+ %and1 = and i64 %shr, 6148914691236517205
+ %add = add nuw i64 %and1, %and
+ %and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
+ %shr3 = lshr i64 %add, 2
+ %and4 = and i64 %shr3, 3689348814741910323
+ %add5 = add nuw nsw i64 %and4, %and2
+ %and6 = and i64 %add5, 1085102592571150095 ; 0F0F0F0F0F0F0F0F
+ %shr7 = lshr i64 %add5, 4
+ %and8 = and i64 %shr7, 1085102592571150095
+ %add9 = add nuw nsw i64 %and8, %and6
+ %and10 = and i64 %add9, 71777214294589695 ; 00FF00FF00FF00FF
+ %shr11 = lshr i64 %add9, 8
+ %and12 = and i64 %shr11, 71777214294589695
+ %add13 = add nuw nsw i64 %and12, %and10
+ %and14 = and i64 %add13, 281470681808895 ; 0000FFFF0000FFFF
+ %shr15 = lshr i64 %add13, 16
+ %and16 = and i64 %shr15, 281470681808895
+ %add17 = add nuw nsw i64 %and16, %and14
+ %and18 = and i64 %add17, 4294967295 ; 00000000FFFFFFFF
+ %shr19 = lshr i64 %add17, 32
+ %add20 = add nuw nsw i64 %and18, %shr19
+ ret i64 %add20
+}
+
+; Test 16-bit popcount pattern (Hacker's Delight)
+define i16 @popcnt1_16(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16(
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.ctpop.i16(i16 [[UWORD:%.*]])
+; CHECK-NEXT: ret i16 [[TMP1]]
+;
+ %and = and i16 %uWord, 21845 ; 0x5555
+ %shr = lshr i16 %uWord, 1
+ %and1 = and i16 %shr, 21845
+ %add = add nuw i16 %and1, %and
+ %and2 = and i16 %add, 13107 ; 0x3333
+ %shr3 = lshr i16 %add, 2
+ %and4 = and i16 %shr3, 13107
+ %add5 = add nuw nsw i16 %and4, %and2
+ %and6 = and i16 %add5, 3855 ; 0x0F0F
+ %shr7 = lshr i16 %add5, 4
+ %and8 = and i16 %shr7, 3855
+ %add9 = add nuw nsw i16 %and8, %and6
+ %and10 = and i16 %add9, 255 ; 0x00FF
+ %shr11 = lshr i16 %add9, 8
+ %add13 = add nuw nsw i16 %shr11, %and10
+ ret i16 %add13
+}
+
+; Test 16-bit popcount pattern variant2 (with extra mask on final shift)
+define i16 @popcnt1_16_variant2(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_variant2(
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.ctpop.i16(i16 [[UWORD:%.*]])
+; CHECK-NEXT: ret i16 [[TMP1]]
+;
+ %and = and i16 %uWord, 21845 ; 0x5555
+ %shr = lshr i16 %uWord, 1
+ %and1 = and i16 %shr, 21845
+ %add = add nuw i16 %and1, %and
+ %and2 = and i16 %add, 13107 ; 0x3333
+ %shr3 = lshr i16 %add, 2
+ %and4 = and i16 %shr3, 13107
+ %add5 = add nuw nsw i16 %and4, %and2
+ %and6 = and i16 %add5, 3855 ; 0x0F0F
+ %shr7 = lshr i16 %add5, 4
+ %and8 = and i16 %shr7, 3855
+ %add9 = add nuw nsw i16 %and8, %and6
+ %and10 = and i16 %add9, 255 ; 0x00FF
+ %shr11 = lshr i16 %add9, 8
+ %and12 = and i16 %shr11, 255
+ %add13 = add nuw nsw i16 %and10, %and12
+ ret i16 %add13
+}
+
+; Vector variant: 16-bit popcount pattern (Hacker's Delight)
+define <8 x i16> @popcnt1_16vec(<8 x i16> %uWord) {
+; CHECK-LABEL: @popcnt1_16vec(
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[UWORD:%.*]])
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
+ %and = and <8 x i16> %uWord, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
+ %shr = lshr <8 x i16> %uWord, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %and1 = and <8 x i16> %shr, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
+ %add = add nuw <8 x i16> %and1, %and
+ %and2 = and <8 x i16> %add, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
+ %shr3 = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %and4 = and <8 x i16> %shr3, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
+ %add5 = add nuw nsw <8 x i16> %and4, %and2
+ %and6 = and <8 x i16> %add5, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
+ %shr7 = lshr <8 x i16> %add5, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+ %and8 = and <8 x i16> %shr7, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
+ %add9 = add nuw nsw <8 x i16> %and8, %and6
+ %and10 = and <8 x i16> %add9, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %shr11 = lshr <8 x i16> %add9, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %add13 = add nuw nsw <8 x i16> %shr11, %and10
+ ret <8 x i16> %add13
+}
+
+; Vector variant: 32-bit popcount pattern (Hacker's Delight)
+define <4 x i32> @popcnt1_32vec(<4 x i32> %uWord) {
+; CHECK-LABEL: @popcnt1_32vec(
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[UWORD:%.*]])
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %and = and <4 x i32> %uWord, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
+ %shr = lshr <4 x i32> %uWord, <i32 1, i32 1, i32 1, i32 1>
+ %and1 = and <4 x i32> %shr, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
+ %add = add nuw <4 x i32> %and1, %and
+ %and2 = and <4 x i32> %add, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
+ %shr3 = lshr <4 x i32> %add, <i32 2, i32 2, i32 2, i32 2>
+ %and4 = and <4 x i32> %shr3, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
+ %add5 = add nuw nsw <4 x i32> %and4, %and2
+ %and6 = and <4 x i32> %add5, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
+ %shr7 = lshr <4 x i32> %add5, <i32 4, i32 4, i32 4, i32 4>
+ %and8 = and <4 x i32> %shr7, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
+ %add9 = add nuw nsw <4 x i32> %and8, %and6
+ %and10 = and <4 x i32> %add9, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
+ %shr11 = lshr <4 x i32> %add9, <i32 8, i32 8, i32 8, i32 8>
+ %and12 = and <4 x i32> %shr11, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
+ %add13 = add nuw nsw <4 x i32> %and12, %and10
+ %and14 = and <4 x i32> %add13, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %shr15 = lshr <4 x i32> %add13, <i32 16, i32 16, i32 16, i32 16>
+ %add16 = add nuw nsw <4 x i32> %and14, %shr15
+ ret <4 x i32> %add16
+}
+
+; Test 64-bit popcount pattern variant2 (with extra mask on final shift)
+define i64 @popcnt1_64_variant2(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_variant2(
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[UWORD:%.*]])
+; CHECK-NEXT: ret i64 [[TMP1]]
+;
+ %and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
+ %shr = lshr i64 %uWord, 1
+ %and1 = and i64 %shr, 6148914691236517205
+ %add = add nuw i64 %and1, %and
+ %and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
+ %shr3 = lshr i64 %add, 2
+ %and4 = and i64 %shr3, 3689348814741910323
+ %add5 = add nuw nsw i64 %and4, %and2
+ %and6 = and i64 %add5, 1085102592571150095 ; 0x0F0F0F0F0F0F0F0F
+ %shr7 = lshr i64 %add5, 4
+ %and8 = and i64 %shr7, 1085102592571150095
+ %add9 = add nuw nsw i64 %and8, %and6
+ %and10 = and i64 %add9, 71777214294589695 ; 0x00FF00FF00FF00FF
+ %shr11 = lshr i64 %add9, 8
+ %and12 = and i64 %shr11, 71777214294589695
+ %add13 = add nuw nsw i64 %and12, %and10
+ %and14 = and i64 %add13, 281470681808895 ; 0x0000FFFF0000FFFF
+ %shr15 = lshr i64 %add13, 16
+ %and16 = and i64 %shr15, 281470681808895
+ %add17 = add nuw nsw i64 %and16, %and14
+ %and18 = and i64 %add17, 4294967295 ; 0x00000000FFFFFFFF
+ %shr19 = lshr i64 %add17, 32
+ %and20 = and i64 %shr19, 4294967295
+ %add21 = add nuw nsw i64 %and18, %and20
+ ret i64 %add21
+}
+
+; Vector variant: 64-bit popcount pattern (Hacker's Delight)
+define <2 x i64> @popcnt1_64vec(<2 x i64> %uWord) {
+; CHECK-LABEL: @popcnt1_64vec(
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[UWORD:%.*]])
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %and = and <2 x i64> %uWord, <i64 6148914691236517205, i64 6148914691236517205>
+ %shr = lshr <2 x i64> %uWord, <i64 1, i64 1>
+ %and1 = and <2 x i64> %shr, <i64 6148914691236517205, i64 6148914691236517205>
+ %add = add nuw <2 x i64> %and1, %and
+ %and2 = and <2 x i64> %add, <i64 3689348814741910323, i64 3689348814741910323>
+ %shr3 = lshr <2 x i64> %add, <i64 2, i64 2>
+ %and4 = and <2 x i64> %shr3, <i64 3689348814741910323, i64 3689348814741910323>
+ %add5 = add nuw nsw <2 x i64> %and4, %and2
+ %and6 = and <2 x i64> %add5, <i64 1085102592571150095, i64 1085102592571150095>
+ %shr7 = lshr <2 x i64> %add5, <i64 4, i64 4>
+ %and8 = and <2 x i64> %shr7, <i64 1085102592571150095, i64 1085102592571150095>
+ %add9 = add nuw nsw <2 x i64> %and8, %and6
+ %and10 = and <2 x i64> %add9, <i64 71777214294589695, i64 71777214294589695>
+ %shr11 = lshr <2 x i64> %add9, <i64 8, i64 8>
+ %and12 = and <2 x i64> %shr11, <i64 71777214294589695, i64 71777214294589695>
+ %add13 = add nuw nsw <2 x i64> %and12, %and10
+ %and14 = and <2 x i64> %add13, <i64 281470681808895, i64 281470681808895>
+ %shr15 = lshr <2 x i64> %add13, <i64 16, i64 16>
+ %and16 = and <2 x i64> %shr15, <i64 281470681808895, i64 281470681808895>
+ %add17 = add nuw nsw <2 x i64> %and16, %and14
+ %and18 = and <2 x i64> %add17, <i64 4294967295, i64 4294967295>
+ %shr19 = lshr <2 x i64> %add17, <i64 32, i64 32>
+ %add20 = add nuw nsw <2 x i64> %and18, %shr19
+ ret <2 x i64> %add20
+}
>From 796bcf76230e397aec6c70d6ad0ad77e8840dee5 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 12 Mar 2026 22:10:05 +0530
Subject: [PATCH 4/9] Fix review comments
---
.../AggressiveInstCombine.cpp | 55 ++++++++-----------
1 file changed, 23 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c43f397d8b562..948e09674293b 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -407,7 +407,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
APInt Mask00FF;
- if (Len <= 16) {
+ if (Len == 16) {
Mask00FF = APInt(16, 0x00FF);
} else {
Mask00FF = APInt::getSplat(Len, APInt(16, 0x00FF));
@@ -426,7 +426,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
// 0x00000000FFFFFFFF)".
Value *ShiftOp;
Value *Start = &I;
- bool is64 = false;
+ bool Is64 = false;
if (match(Start,
m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
m_SpecificInt(Mask64)),
@@ -435,64 +435,55 @@ static bool tryToRecognizePopCount1(Instruction &I) {
m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64))))) {
Start = ShiftOp;
- is64 = true;
+ Is64 = true;
}
Value *LShrOp0;
// Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
// Matching "(uWord & 0x0000FFFF) + ((uWord>>16) & 0x0000FFFF)".
- bool test16 = match(
+ bool Test16 = match(
Start, m_c_Add(m_And(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
m_SpecificInt(Mask0000FFFF)),
m_And(m_Deferred(LShrOp0), m_SpecificInt(Mask0000FFFF))));
- bool is32 = false;
- if ((is64 && test16) ||
- (!is64 && Len == 32 &&
- (test16 ||
+ bool Is32 = false;
+ if ((Is64 && Test16) ||
+ (!Is64 && Len == 32 &&
+ (Test16 ||
match(Start, m_c_Add(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
m_And(m_Deferred(LShrOp0),
m_SpecificInt(Mask0000FFFF))))))) {
Start = LShrOp0;
-
- is32 = true;
+ Is32 = true;
}
Value *ShiftOp0;
// Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
// OR
// Matching "uWord = (uWord & 0x00FF00FF) + (uWord>>8) ;".
- bool test8 = match(
+ bool Test8 = match(
Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
m_SpecificInt(Mask00FF)),
m_And(m_Deferred(ShiftOp0), m_SpecificInt(Mask00FF))));
-
- bool is16 = false;
- if ((is32 && test8) ||
- (!is32 && Len == 16 &&
- (test8 ||
- match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
- m_And(m_Deferred(ShiftOp0),
- m_SpecificInt(Mask00FF))))))) {
- Start = ShiftOp0;
- is16 = true;
+ if (!((Is32 && Test8) ||
+ (!Is32 && Len == 16 &&
+ (Test8 ||
+ match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+ m_And(m_Deferred(ShiftOp0),
+ m_SpecificInt(Mask00FF)))))))) {
+ return false;
}
Value *ShiftOp1;
// Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
- bool test4 =
- match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
- m_SpecificInt(Mask0F)),
- m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))));
-
- bool is8 = false;
- if ((is16 && test4)) {
- Start = ShiftOp1;
- is8 = true;
+ if (!match(ShiftOp0,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
+ m_SpecificInt(Mask0F)),
+ m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))))) {
+ return false;
}
Value *ShiftOp2;
// Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
- if (is8 &&
- match(Start,
+ if (match(ShiftOp1,
m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
m_SpecificInt(Mask33)),
m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
>From 7e761ed69d19f472ba4e8fb0c60674781a737ded Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 12 Mar 2026 22:52:56 +0530
Subject: [PATCH 5/9] Add negative test cases.
---
.../AggressiveInstCombine/popcount.ll | 492 ++++++++++++++++++
1 file changed, 492 insertions(+)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index f161b47428329..dfcaa89fd8cf2 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -503,3 +503,495 @@ define <2 x i64> @popcnt1_64vec(<2 x i64> %uWord) {
%add20 = add nuw nsw <2 x i64> %and18, %shr19
ret <2 x i64> %add20
}
+
+; Negative test cases - these should NOT be optimized to llvm.ctpop
+
+; NEGATIVE: i8 type - too small (Len <= 8), bails out at line 402-404
+define i8 @popcnt1_8_negative(i8 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_8_negative(
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[UWORD:%.*]], 85
+; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i8 [[SHR]], 85
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i8 [[ADD]], 51
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i8 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i8 [[SHR3]], 51
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i8 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i8 [[ADD5]], 15
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i8 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i8 [[SHR7]], 15
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i8 [[AND8]], [[AND6]]
+; CHECK-NEXT: ret i8 [[ADD9]]
+;
+ %and = and i8 %uWord, 85 ; 0x55
+ %shr = lshr i8 %uWord, 1
+ %and1 = and i8 %shr, 85
+ %add = add nuw i8 %and1, %and
+ %and2 = and i8 %add, 51 ; 0x33
+ %shr3 = lshr i8 %add, 2
+ %and4 = and i8 %shr3, 51
+ %add5 = add nuw nsw i8 %and4, %and2
+ %and6 = and i8 %add5, 15 ; 0x0F
+ %shr7 = lshr i8 %add5, 4
+ %and8 = and i8 %shr7, 15
+ %add9 = add nuw nsw i8 %and8, %and6
+ ret i8 %add9
+}
+
+; NEGATIVE: i128 type - too large (Len > 64), bails out at line 402-404
+define i128 @popcnt1_128_negative(i128 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_128_negative(
+; CHECK-NEXT: [[AND:%.*]] = and i128 [[UWORD:%.*]], 113427455640312821154458202477256070485
+; CHECK-NEXT: [[SHR:%.*]] = lshr i128 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i128 [[SHR]], 113427455640312821154458202477256070485
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i128 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i128 [[ADD]], 68056473384187692692674921486353642291
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i128 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i128 [[SHR3]], 68056473384187692692674921486353642291
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i128 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i128 [[ADD5]], 20016609818878733144904388672456953615
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i128 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i128 [[SHR7]], 20016609818878733144904388672456953615
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i128 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i128 [[ADD9]], 1334440654591915542993625911497130241
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i128 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i128 [[SHR11]], 1334440654591915542993625911497130241
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i128 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i128 [[ADD13]], 5192296858534827628530496329220095
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i128 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i128 [[SHR15]], 5192296858534827628530496329220095
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i128 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i128 [[ADD17]], 79228162514264337593543950335
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i128 [[ADD17]], 32
+; CHECK-NEXT: [[AND20:%.*]] = and i128 [[SHR19]], 79228162514264337593543950335
+; CHECK-NEXT: [[ADD21:%.*]] = add nuw nsw i128 [[AND18]], [[AND20]]
+; CHECK-NEXT: [[AND22:%.*]] = and i128 [[ADD21]], -1
+; CHECK-NEXT: [[SHR23:%.*]] = lshr i128 [[ADD21]], 64
+; CHECK-NEXT: [[ADD24:%.*]] = add nuw nsw i128 [[AND22]], [[SHR23]]
+; CHECK-NEXT: ret i128 [[ADD24]]
+;
+ %and = and i128 %uWord, 113427455640312821154458202477256070485 ; 0x55555555555555555555555555555555
+ %shr = lshr i128 %uWord, 1
+ %and1 = and i128 %shr, 113427455640312821154458202477256070485
+ %add = add nuw i128 %and1, %and
+ %and2 = and i128 %add, 68056473384187692692674921486353642291 ; 0x33333333333333333333333333333333
+ %shr3 = lshr i128 %add, 2
+ %and4 = and i128 %shr3, 68056473384187692692674921486353642291
+ %add5 = add nuw nsw i128 %and4, %and2
+ %and6 = and i128 %add5, 20016609818878733144904388672456953615 ; 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
+ %shr7 = lshr i128 %add5, 4
+ %and8 = and i128 %shr7, 20016609818878733144904388672456953615
+ %add9 = add nuw nsw i128 %and8, %and6
+ %and10 = and i128 %add9, 1334440654591915542993625911497130241 ; 0x00FF00FF00FF00FF00FF00FF00FF00FF
+ %shr11 = lshr i128 %add9, 8
+ %and12 = and i128 %shr11, 1334440654591915542993625911497130241
+ %add13 = add nuw nsw i128 %and12, %and10
+ %and14 = and i128 %add13, 5192296858534827628530496329220095 ; 0x0000FFFF0000FFFF0000FFFF0000FFFF
+ %shr15 = lshr i128 %add13, 16
+ %and16 = and i128 %shr15, 5192296858534827628530496329220095
+ %add17 = add nuw nsw i128 %and16, %and14
+ %and18 = and i128 %add17, 79228162514264337593543950335 ; 0x00000000FFFFFFFF00000000FFFFFFFF
+ %shr19 = lshr i128 %add17, 32
+ %and20 = and i128 %shr19, 79228162514264337593543950335
+ %add21 = add nuw nsw i128 %and18, %and20
+ %and22 = and i128 %add21, 340282366920938463463374607431768211455 ; 0x0000000000000000FFFFFFFFFFFFFFFF
+ %shr23 = lshr i128 %add21, 64
+ %add24 = add nuw nsw i128 %and22, %shr23
+ ret i128 %add24
+}
+
+; NEGATIVE: Missing the 0x55 mask step - incomplete pattern, bails out at line 475-487
+define i32 @popcnt1_32_missing_step1(i32 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_32_missing_step1(
+; CHECK-NEXT: [[AND2:%.*]] = and i32 [[UWORD:%.*]], 858993459
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i32 [[UWORD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i32 [[SHR3]], 858993459
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i32 [[ADD5]], 252645135
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i32 [[SHR7]], 252645135
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i32 [[ADD9]], 16711935
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i32 [[SHR11]], 16711935
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i32 [[ADD13]], 65535
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
+; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT: ret i32 [[ADD16]]
+;
+ ; Missing: %and = and i32 %uWord, 1431655765
+ ; Missing: %shr = lshr i32 %uWord, 1
+ ; Missing: %and1 = and i32 %shr, 1431655765
+ ; Missing: %add = add nuw i32 %and1, %and
+ ; Starting from step 2 (0x33 mask)
+ %and2 = and i32 %uWord, 858993459
+ %shr3 = lshr i32 %uWord, 2
+ %and4 = and i32 %shr3, 858993459
+ %add5 = add nuw nsw i32 %and4, %and2
+ %and6 = and i32 %add5, 252645135
+ %shr7 = lshr i32 %add5, 4
+ %and8 = and i32 %shr7, 252645135
+ %add9 = add nuw nsw i32 %and8, %and6
+ %and10 = and i32 %add9, 16711935
+ %shr11 = lshr i32 %add9, 8
+ %and12 = and i32 %shr11, 16711935
+ %add13 = add nuw nsw i32 %and12, %and10
+ %and14 = and i32 %add13, 65535
+ %shr15 = lshr i32 %add13, 16
+ %add16 = add nuw nsw i32 %and14, %shr15
+ ret i32 %add16
+}
+
+; NEGATIVE: Missing the 0x0F mask step - incomplete pattern, bails out at line 467-469
+define i32 @popcnt1_32_missing_step3(i32 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_32_missing_step3(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[UWORD:%.*]], 1431655765
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ADD]], 858993459
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i32 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i32 [[SHR3]], 858993459
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND10:%.*]] = and i32 [[ADD5]], 16711935
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i32 [[ADD5]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i32 [[SHR11]], 16711935
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i32 [[ADD13]], 65535
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
+; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT: ret i32 [[ADD16]]
+;
+ %and = and i32 %uWord, 1431655765
+ %shr = lshr i32 %uWord, 1
+ %and1 = and i32 %shr, 1431655765
+ %add = add nuw i32 %and1, %and
+ %and2 = and i32 %add, 858993459
+ %shr3 = lshr i32 %add, 2
+ %and4 = and i32 %shr3, 858993459
+ %add5 = add nuw nsw i32 %and4, %and2
+ ; Missing: %and6 = and i32 %add5, 252645135
+ ; Missing: %shr7 = lshr i32 %add5, 4
+ ; Missing: %and8 = and i32 %shr7, 252645135
+ ; Missing: %add9 = add nuw nsw i32 %and8, %and6
+ ; Jumping to step 4 (0x00FF00FF mask)
+ %and10 = and i32 %add5, 16711935
+ %shr11 = lshr i32 %add5, 8
+ %and12 = and i32 %shr11, 16711935
+ %add13 = add nuw nsw i32 %and12, %and10
+ %and14 = and i32 %add13, 65535
+ %shr15 = lshr i32 %add13, 16
+ %add16 = add nuw nsw i32 %and14, %shr15
+ ret i32 %add16
+}
+
+; NEGATIVE: Wrong mask value - using 0x44444444 instead of 0x55555555
+define i32 @popcnt1_32_wrong_mask(i32 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_32_wrong_mask(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[UWORD:%.*]], 1145324612
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 1145324612
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ADD]], 858993459
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i32 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i32 [[SHR3]], 858993459
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i32 [[ADD5]], 252645135
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i32 [[SHR7]], 252645135
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i32 [[ADD9]], 16711935
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i32 [[SHR11]], 16711935
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i32 [[ADD13]], 65535
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
+; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT: ret i32 [[ADD16]]
+;
+ %and = and i32 %uWord, 1145324612 ; 0x44444444 instead of 0x55555555
+ %shr = lshr i32 %uWord, 1
+ %and1 = and i32 %shr, 1145324612
+ %add = add nuw i32 %and1, %and
+ %and2 = and i32 %add, 858993459
+ %shr3 = lshr i32 %add, 2
+ %and4 = and i32 %shr3, 858993459
+ %add5 = add nuw nsw i32 %and4, %and2
+ %and6 = and i32 %add5, 252645135
+ %shr7 = lshr i32 %add5, 4
+ %and8 = and i32 %shr7, 252645135
+ %add9 = add nuw nsw i32 %and8, %and6
+ %and10 = and i32 %add9, 16711935
+ %shr11 = lshr i32 %add9, 8
+ %and12 = and i32 %shr11, 16711935
+ %add13 = add nuw nsw i32 %and12, %and10
+ %and14 = and i32 %add13, 65535
+ %shr15 = lshr i32 %add13, 16
+ %add16 = add nuw nsw i32 %and14, %shr15
+ ret i32 %add16
+}
+
+; NEGATIVE: 64-bit - Missing the 0x55 mask step - incomplete pattern
+define i64 @popcnt1_64_missing_step1(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_missing_step1(
+; CHECK-NEXT: [[AND2:%.*]] = and i64 [[UWORD:%.*]], 3689348814741910323
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[UWORD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
+; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT: ret i64 [[ADD20]]
+;
+ ; Missing: %and = and i64 %uWord, 6148914691236517205
+ ; Missing: %shr = lshr i64 %uWord, 1
+ ; Missing: %and1 = and i64 %shr, 6148914691236517205
+ ; Missing: %add = add nuw i64 %and1, %and
+ ; Starting from step 2 (0x33 mask)
+ %and2 = and i64 %uWord, 3689348814741910323
+ %shr3 = lshr i64 %uWord, 2
+ %and4 = and i64 %shr3, 3689348814741910323
+ %add5 = add nuw nsw i64 %and4, %and2
+ %and6 = and i64 %add5, 1085102592571150095
+ %shr7 = lshr i64 %add5, 4
+ %and8 = and i64 %shr7, 1085102592571150095
+ %add9 = add nuw nsw i64 %and8, %and6
+ %and10 = and i64 %add9, 71777214294589695
+ %shr11 = lshr i64 %add9, 8
+ %and12 = and i64 %shr11, 71777214294589695
+ %add13 = add nuw nsw i64 %and12, %and10
+ %and14 = and i64 %add13, 281470681808895
+ %shr15 = lshr i64 %add13, 16
+ %and16 = and i64 %shr15, 281470681808895
+ %add17 = add nuw nsw i64 %and16, %and14
+ %and18 = and i64 %add17, 4294967295
+ %shr19 = lshr i64 %add17, 32
+ %add20 = add nuw nsw i64 %and18, %shr19
+ ret i64 %add20
+}
+
+; NEGATIVE: 64-bit - Missing the 0x0F mask step - incomplete pattern
+define i64 @popcnt1_64_missing_step3(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_missing_step3(
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[UWORD:%.*]], 6148914691236517205
+; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 6148914691236517205
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND10:%.*]] = and i64 [[ADD5]], 71777214294589695
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i64 [[ADD5]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
+; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT: ret i64 [[ADD20]]
+;
+ %and = and i64 %uWord, 6148914691236517205
+ %shr = lshr i64 %uWord, 1
+ %and1 = and i64 %shr, 6148914691236517205
+ %add = add nuw i64 %and1, %and
+ %and2 = and i64 %add, 3689348814741910323
+ %shr3 = lshr i64 %add, 2
+ %and4 = and i64 %shr3, 3689348814741910323
+ %add5 = add nuw nsw i64 %and4, %and2
+ ; Missing: %and6 = and i64 %add5, 1085102592571150095
+ ; Missing: %shr7 = lshr i64 %add5, 4
+ ; Missing: %and8 = and i64 %shr7, 1085102592571150095
+ ; Missing: %add9 = add nuw nsw i64 %and8, %and6
+ ; Jumping to step 4 (0x00FF00FF mask)
+ %and10 = and i64 %add5, 71777214294589695
+ %shr11 = lshr i64 %add5, 8
+ %and12 = and i64 %shr11, 71777214294589695
+ %add13 = add nuw nsw i64 %and12, %and10
+ %and14 = and i64 %add13, 281470681808895
+ %shr15 = lshr i64 %add13, 16
+ %and16 = and i64 %shr15, 281470681808895
+ %add17 = add nuw nsw i64 %and16, %and14
+ %and18 = and i64 %add17, 4294967295
+ %shr19 = lshr i64 %add17, 32
+ %add20 = add nuw nsw i64 %and18, %shr19
+ ret i64 %add20
+}
+
+; NEGATIVE: 64-bit - Wrong mask value - using 0x4444444444444444 instead of 0x5555555555555555
+define i64 @popcnt1_64_wrong_mask(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_wrong_mask(
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[UWORD:%.*]], 4919131752989213764
+; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 4919131752989213764
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
+; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT: ret i64 [[ADD20]]
+;
+ %and = and i64 %uWord, 4919131752989213764 ; 0x4444444444444444 instead of 0x5555555555555555
+ %shr = lshr i64 %uWord, 1
+ %and1 = and i64 %shr, 4919131752989213764
+ %add = add nuw i64 %and1, %and
+ %and2 = and i64 %add, 3689348814741910323
+ %shr3 = lshr i64 %add, 2
+ %and4 = and i64 %shr3, 3689348814741910323
+ %add5 = add nuw nsw i64 %and4, %and2
+ %and6 = and i64 %add5, 1085102592571150095
+ %shr7 = lshr i64 %add5, 4
+ %and8 = and i64 %shr7, 1085102592571150095
+ %add9 = add nuw nsw i64 %and8, %and6
+ %and10 = and i64 %add9, 71777214294589695
+ %shr11 = lshr i64 %add9, 8
+ %and12 = and i64 %shr11, 71777214294589695
+ %add13 = add nuw nsw i64 %and12, %and10
+ %and14 = and i64 %add13, 281470681808895
+ %shr15 = lshr i64 %add13, 16
+ %and16 = and i64 %shr15, 281470681808895
+ %add17 = add nuw nsw i64 %and16, %and14
+ %and18 = and i64 %add17, 4294967295
+ %shr19 = lshr i64 %add17, 32
+ %add20 = add nuw nsw i64 %and18, %shr19
+ ret i64 %add20
+}
+
+; NEGATIVE: 16-bit - Missing the 0x55 mask step - incomplete pattern
+define i16 @popcnt1_16_missing_step1(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_missing_step1(
+; CHECK-NEXT: [[AND2:%.*]] = and i16 [[UWORD:%.*]], 13107
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i16 [[UWORD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i16 [[SHR3]], 13107
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i16 [[ADD5]], 3855
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i16 [[SHR7]], 3855
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i16 [[ADD9]], 255
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT: ret i16 [[ADD13]]
+;
+ ; Missing: %and = and i16 %uWord, 21845
+ ; Missing: %shr = lshr i16 %uWord, 1
+ ; Missing: %and1 = and i16 %shr, 21845
+ ; Missing: %add = add nuw i16 %and1, %and
+ ; Starting from step 2 (0x33 mask)
+ %and2 = and i16 %uWord, 13107
+ %shr3 = lshr i16 %uWord, 2
+ %and4 = and i16 %shr3, 13107
+ %add5 = add nuw nsw i16 %and4, %and2
+ %and6 = and i16 %add5, 3855
+ %shr7 = lshr i16 %add5, 4
+ %and8 = and i16 %shr7, 3855
+ %add9 = add nuw nsw i16 %and8, %and6
+ %and10 = and i16 %add9, 255
+ %shr11 = lshr i16 %add9, 8
+ %add13 = add nuw nsw i16 %shr11, %and10
+ ret i16 %add13
+}
+
+; NEGATIVE: 16-bit - Missing the 0x0F mask step - incomplete pattern
+define i16 @popcnt1_16_missing_step3(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_missing_step3(
+; CHECK-NEXT: [[AND:%.*]] = and i16 [[UWORD:%.*]], 21845
+; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i16 [[SHR]], 21845
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i16 [[ADD]], 13107
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i16 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i16 [[SHR3]], 13107
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND10:%.*]] = and i16 [[ADD5]], 255
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i16 [[ADD5]], 8
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT: ret i16 [[ADD13]]
+;
+ %and = and i16 %uWord, 21845
+ %shr = lshr i16 %uWord, 1
+ %and1 = and i16 %shr, 21845
+ %add = add nuw i16 %and1, %and
+ %and2 = and i16 %add, 13107
+ %shr3 = lshr i16 %add, 2
+ %and4 = and i16 %shr3, 13107
+ %add5 = add nuw nsw i16 %and4, %and2
+ ; Missing: %and6 = and i16 %add5, 3855
+ ; Missing: %shr7 = lshr i16 %add5, 4
+ ; Missing: %and8 = and i16 %shr7, 3855
+ ; Missing: %add9 = add nuw nsw i16 %and8, %and6
+ ; Jumping to step 4 (0x00FF mask)
+ %and10 = and i16 %add5, 255
+ %shr11 = lshr i16 %add5, 8
+ %add13 = add nuw nsw i16 %shr11, %and10
+ ret i16 %add13
+}
+
+; NEGATIVE: 16-bit - Wrong mask value - using 0x4444 instead of 0x5555
+define i16 @popcnt1_16_wrong_mask(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_wrong_mask(
+; CHECK-NEXT: [[AND:%.*]] = and i16 [[UWORD:%.*]], 17476
+; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i16 [[SHR]], 17476
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i16 [[ADD]], 13107
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i16 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i16 [[SHR3]], 13107
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i16 [[ADD5]], 3855
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i16 [[SHR7]], 3855
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i16 [[ADD9]], 255
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT: ret i16 [[ADD13]]
+;
+ %and = and i16 %uWord, 17476 ; 0x4444 instead of 0x5555
+ %shr = lshr i16 %uWord, 1
+ %and1 = and i16 %shr, 17476
+ %add = add nuw i16 %and1, %and
+ %and2 = and i16 %add, 13107
+ %shr3 = lshr i16 %add, 2
+ %and4 = and i16 %shr3, 13107
+ %add5 = add nuw nsw i16 %and4, %and2
+ %and6 = and i16 %add5, 3855
+ %shr7 = lshr i16 %add5, 4
+ %and8 = and i16 %shr7, 3855
+ %add9 = add nuw nsw i16 %and8, %and6
+ %and10 = and i16 %add9, 255
+ %shr11 = lshr i16 %add9, 8
+ %add13 = add nuw nsw i16 %shr11, %and10
+ ret i16 %add13
+}
>From 52917117be6236481185aba8d15e66073de2c18d Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Mon, 23 Mar 2026 11:43:28 +0530
Subject: [PATCH 6/9] Implement code review comments
---
.../AggressiveInstCombine.cpp | 132 +++++++-----------
1 file changed, 53 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 948e09674293b..1dd0d8fd79064 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -373,7 +373,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
}
// Try to recognize below function as popcount intrinsic.
-// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Ref. Hacker Delights
// Also used in TargetLowering::expandCTPOP().
//
// int popcount32(unsigned int i) {
@@ -388,7 +388,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
// uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);
// uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);
// uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);
-// return (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
+// uWord = (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
// return (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;
// }
static bool tryToRecognizePopCount1(Instruction &I) {
@@ -400,7 +400,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
return false;
unsigned Len = Ty->getScalarSizeInBits();
- if (!(Len <= 64 && Len > 8 && Len % 8 == 0))
+ if (Len > 64 || Len <= 8 || Len % 8 != 0)
return false;
APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
@@ -420,90 +420,64 @@ static bool tryToRecognizePopCount1(Instruction &I) {
}
APInt Mask64 = APInt(64, 0x00000000FFFFFFFF);
- // Matching "(uWord & 0x00000000FFFFFFFF) + (uWord>>32)".
- // OR
- // Matching "(uWord & 0x00000000FFFFFFFF) + ((uWord>>32) &
- // 0x00000000FFFFFFFF)".
Value *ShiftOp;
Value *Start = &I;
- bool Is64 = false;
- if (match(Start,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
- m_SpecificInt(Mask64)),
- m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64)))) ||
- match(Start,
- m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
- m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64))))) {
+ APInt Mask;
+ for (unsigned I = Len; I >= 8; I = I / 2) {
+ switch (I) {
+ case 64:
+ Mask = Mask64;
+ break;
+ case 32:
+ Mask = Mask0000FFFF;
+ break;
+ case 16:
+ Mask = Mask00FF;
+ break;
+ case 8:
+ Mask = Mask0F;
+ break;
+ }
+ // Matching "(uWord & Mask) + (uWord>>I/2)".
+ // OR
+ // Matching "(uWord & Mask) + ((uWord>>I/2) &
+ // 0x00000000FFFFFFFF)".
+ if (Len >= I &&
+ !(match(Start,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+ m_SpecificInt(Mask)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) ||
+ match(Start,
+ m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask))))))
+ return false;
+
Start = ShiftOp;
- Is64 = true;
- }
- Value *LShrOp0;
- // Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
- // Matching "(uWord & 0x0000FFFF) + ((uWord>>16) & 0x0000FFFF)".
- bool Test16 = match(
- Start, m_c_Add(m_And(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
- m_SpecificInt(Mask0000FFFF)),
- m_And(m_Deferred(LShrOp0), m_SpecificInt(Mask0000FFFF))));
-
- bool Is32 = false;
- if ((Is64 && Test16) ||
- (!Is64 && Len == 32 &&
- (Test16 ||
- match(Start, m_c_Add(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
- m_And(m_Deferred(LShrOp0),
- m_SpecificInt(Mask0000FFFF))))))) {
- Start = LShrOp0;
- Is32 = true;
- }
- Value *ShiftOp0;
- // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
- // OR
- // Matching "uWord = (uWord & 0x00FF00FF) + (uWord>>8) ;".
- bool Test8 = match(
- Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
- m_SpecificInt(Mask00FF)),
- m_And(m_Deferred(ShiftOp0), m_SpecificInt(Mask00FF))));
- if (!((Is32 && Test8) ||
- (!Is32 && Len == 16 &&
- (Test8 ||
- match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
- m_And(m_Deferred(ShiftOp0),
- m_SpecificInt(Mask00FF)))))))) {
- return false;
+ ShiftOp = nullptr;
}
- Value *ShiftOp1;
- // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
- if (!match(ShiftOp0,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
- m_SpecificInt(Mask0F)),
- m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))))) {
+ ShiftOp = nullptr;
+ // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+ if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),
+ m_SpecificInt(Mask33)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask33)))))
return false;
- }
- Value *ShiftOp2;
- // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
- if (match(ShiftOp1,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
- m_SpecificInt(Mask33)),
- m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
- Value *ShiftOp3;
- // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
- // 0x55555555)".
- if (match(ShiftOp2,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
- m_SpecificInt(Mask55)),
- m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(
- Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp3}));
- ++NumPopCountRecognized;
- return true;
- }
- }
+ Start = ShiftOp;
+ ShiftOp = nullptr;
+ // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
+ // 0x55555555)".
+ if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(1)),
+ m_SpecificInt(Mask55)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask55)))))
+ return false;
- return false;
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(
+ Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp}));
+ ++NumPopCountRecognized;
+ return true;
}
/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
>From a5707417f1ed783fb31c5db5ad12d290576dfc79 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 24 Mar 2026 18:05:50 +0530
Subject: [PATCH 7/9] Add the power of 2 check
---
.../AggressiveInstCombine.cpp | 4 +
.../AggressiveInstCombine/popcount.ll | 208 ++++++++++++++++++
2 files changed, 212 insertions(+)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 1dd0d8fd79064..41a138974a4ee 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -403,6 +403,10 @@ static bool tryToRecognizePopCount1(Instruction &I) {
if (Len > 64 || Len <= 8 || Len % 8 != 0)
return false;
+ // Len should be a power of 2 for the loop to work correctly
+ if (!isPowerOf2_32(Len))
+ return false;
+
APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index dfcaa89fd8cf2..21167abbcef8f 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -995,3 +995,211 @@ define i16 @popcnt1_16_wrong_mask(i16 noundef %uWord) {
%add13 = add nuw nsw i16 %shr11, %and10
ret i16 %add13
}
+
+; NEGATIVE: 24-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i24 @popcnt1_negative_i24(i24 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i24(
+; CHECK-NEXT: [[AND:%.*]] = and i24 [[UWORD:%.*]], 5592405
+; CHECK-NEXT: [[SHR:%.*]] = lshr i24 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i24 [[SHR]], 5592405
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i24 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i24 [[ADD]], 3355443
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i24 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i24 [[SHR3]], 3355443
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i24 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i24 [[ADD5]], 986895
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i24 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i24 [[SHR7]], 986895
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i24 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i24 [[ADD9]], 65535
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i24 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i24 [[SHR11]], 65535
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i24 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i24 [[ADD13]], 255
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i24 [[ADD13]], 16
+; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i24 [[SHR15]], [[AND14]]
+; CHECK-NEXT: ret i24 [[ADD16]]
+;
+ %and = and i24 %uWord, 5592405 ; 0x555555
+ %shr = lshr i24 %uWord, 1
+ %and1 = and i24 %shr, 5592405
+ %add = add nuw i24 %and1, %and
+ %and2 = and i24 %add, 3355443 ; 0x333333
+ %shr3 = lshr i24 %add, 2
+ %and4 = and i24 %shr3, 3355443
+ %add5 = add nuw nsw i24 %and4, %and2
+ %and6 = and i24 %add5, 986895 ; 0x0F0F0F
+ %shr7 = lshr i24 %add5, 4
+ %and8 = and i24 %shr7, 986895
+ %add9 = add nuw nsw i24 %and8, %and6
+ %and10 = and i24 %add9, 65535 ; 0x00FFFF
+ %shr11 = lshr i24 %add9, 8
+ %and12 = and i24 %shr11, 65535
+ %add13 = add nuw nsw i24 %and12, %and10
+ %and14 = and i24 %add13, 255 ; 0x0000FF
+ %shr15 = lshr i24 %add13, 16
+ %add16 = add nuw nsw i24 %shr15, %and14
+ ret i24 %add16
+}
+
+; NEGATIVE: 40-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i40 @popcnt1_negative_i40(i40 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i40(
+; CHECK-NEXT: [[AND:%.*]] = and i40 [[UWORD:%.*]], 366503875925
+; CHECK-NEXT: [[SHR:%.*]] = lshr i40 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i40 [[SHR]], 366503875925
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i40 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i40 [[ADD]], 219902325555
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i40 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i40 [[SHR3]], 219902325555
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i40 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i40 [[ADD5]], 67818775935
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i40 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i40 [[SHR7]], 67818775935
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i40 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i40 [[ADD9]], 280371153695
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i40 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i40 [[SHR11]], 280371153695
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i40 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i40 [[ADD13]], -1
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i40 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i40 [[SHR15]], -1
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i40 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i40 [[ADD17]], 4294967295
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i40 [[ADD17]], 32
+; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i40 [[SHR19]], [[AND18]]
+; CHECK-NEXT: ret i40 [[ADD20]]
+;
+ %and = and i40 %uWord, 366503875925 ; 0x5555555555
+ %shr = lshr i40 %uWord, 1
+ %and1 = and i40 %shr, 366503875925
+ %add = add nuw i40 %and1, %and
+ %and2 = and i40 %add, 219902325555 ; 0x3333333333
+ %shr3 = lshr i40 %add, 2
+ %and4 = and i40 %shr3, 219902325555
+ %add5 = add nuw nsw i40 %and4, %and2
+ %and6 = and i40 %add5, 67818775935 ; 0x0F0F0F0F0F
+ %shr7 = lshr i40 %add5, 4
+ %and8 = and i40 %shr7, 67818775935
+ %add9 = add nuw nsw i40 %and8, %and6
+ %and10 = and i40 %add9, 280371153695 ; 0x00FF00FF00FF (splat for 40-bit)
+ %shr11 = lshr i40 %add9, 8
+ %and12 = and i40 %shr11, 280371153695
+ %add13 = add nuw nsw i40 %and12, %and10
+ %and14 = and i40 %add13, 1099511627775 ; 0x0000FFFF0000FFFF (splat for 40-bit)
+ %shr15 = lshr i40 %add13, 16
+ %and16 = and i40 %shr15, 1099511627775
+ %add17 = add nuw nsw i40 %and16, %and14
+ %and18 = and i40 %add17, 4294967295 ; 0x00000000FFFFFFFF
+ %shr19 = lshr i40 %add17, 32
+ %add20 = add nuw nsw i40 %shr19, %and18
+ ret i40 %add20
+}
+
+; NEGATIVE: 48-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i48 @popcnt1_negative_i48(i48 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i48(
+; CHECK-NEXT: [[AND:%.*]] = and i48 [[UWORD:%.*]], 93824992236885
+; CHECK-NEXT: [[SHR:%.*]] = lshr i48 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i48 [[SHR]], 93824992236885
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i48 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i48 [[ADD]], 56294995342131
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i48 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i48 [[SHR3]], 56294995342131
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i48 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i48 [[ADD5]], 17361641481615
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i48 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i48 [[SHR7]], 17361641481615
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i48 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i48 [[ADD9]], 71777214294895
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i48 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i48 [[SHR11]], 71777214294895
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i48 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i48 [[ADD13]], -4294967041
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i48 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i48 [[SHR15]], -4294967041
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i48 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i48 [[ADD17]], 4294967295
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i48 [[ADD17]], 32
+; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i48 [[SHR19]], [[AND18]]
+; CHECK-NEXT: ret i48 [[ADD20]]
+;
+ %and = and i48 %uWord, 93824992236885 ; 0x555555555555
+ %shr = lshr i48 %uWord, 1
+ %and1 = and i48 %shr, 93824992236885
+ %add = add nuw i48 %and1, %and
+ %and2 = and i48 %add, 56294995342131 ; 0x333333333333
+ %shr3 = lshr i48 %add, 2
+ %and4 = and i48 %shr3, 56294995342131
+ %add5 = add nuw nsw i48 %and4, %and2
+ %and6 = and i48 %add5, 17361641481615 ; 0x0F0F0F0F0F0F
+ %shr7 = lshr i48 %add5, 4
+ %and8 = and i48 %shr7, 17361641481615
+ %add9 = add nuw nsw i48 %and8, %and6
+ %and10 = and i48 %add9, 71777214294895 ; 0x00FF00FF00FF
+ %shr11 = lshr i48 %add9, 8
+ %and12 = and i48 %shr11, 71777214294895
+ %add13 = add nuw nsw i48 %and12, %and10
+ %and14 = and i48 %add13, 281470681743615 ; 0x0000FFFF0000FFFF
+ %shr15 = lshr i48 %add13, 16
+ %and16 = and i48 %shr15, 281470681743615
+ %add17 = add nuw nsw i48 %and16, %and14
+ %and18 = and i48 %add17, 4294967295 ; 0x00000000FFFFFFFF
+ %shr19 = lshr i48 %add17, 32
+ %add20 = add nuw nsw i48 %shr19, %and18
+ ret i48 %add20
+}
+
+; NEGATIVE: 56-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i56 @popcnt1_negative_i56(i56 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i56(
+; CHECK-NEXT: [[AND:%.*]] = and i56 [[UWORD:%.*]], 24019198012642645
+; CHECK-NEXT: [[SHR:%.*]] = lshr i56 [[UWORD]], 1
+; CHECK-NEXT: [[AND1:%.*]] = and i56 [[SHR]], 24019198012642645
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i56 [[AND1]], [[AND]]
+; CHECK-NEXT: [[AND2:%.*]] = and i56 [[ADD]], 14411518807585587
+; CHECK-NEXT: [[SHR3:%.*]] = lshr i56 [[ADD]], 2
+; CHECK-NEXT: [[AND4:%.*]] = and i56 [[SHR3]], 14411518807585587
+; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i56 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[AND6:%.*]] = and i56 [[ADD5]], 4444132406286415
+; CHECK-NEXT: [[SHR7:%.*]] = lshr i56 [[ADD5]], 4
+; CHECK-NEXT: [[AND8:%.*]] = and i56 [[SHR7]], 4444132406286415
+; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i56 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[AND10:%.*]] = and i56 [[ADD9]], 18374686479671535
+; CHECK-NEXT: [[SHR11:%.*]] = lshr i56 [[ADD9]], 8
+; CHECK-NEXT: [[AND12:%.*]] = and i56 [[SHR11]], 18374686479671535
+; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i56 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[AND14:%.*]] = and i56 [[ADD13]], -4278190081
+; CHECK-NEXT: [[SHR15:%.*]] = lshr i56 [[ADD13]], 16
+; CHECK-NEXT: [[AND16:%.*]] = and i56 [[SHR15]], -4278190081
+; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i56 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[AND18:%.*]] = and i56 [[ADD17]], 4294967295
+; CHECK-NEXT: [[SHR19:%.*]] = lshr i56 [[ADD17]], 32
+; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i56 [[SHR19]], [[AND18]]
+; CHECK-NEXT: ret i56 [[ADD20]]
+;
+ %and = and i56 %uWord, 24019198012642645 ; 0x55555555555555
+ %shr = lshr i56 %uWord, 1
+ %and1 = and i56 %shr, 24019198012642645
+ %add = add nuw i56 %and1, %and
+ %and2 = and i56 %add, 14411518807585587 ; 0x33333333333333
+ %shr3 = lshr i56 %add, 2
+ %and4 = and i56 %shr3, 14411518807585587
+ %add5 = add nuw nsw i56 %and4, %and2
+ %and6 = and i56 %add5, 4444132406286415 ; 0x0F0F0F0F0F0F0F
+ %shr7 = lshr i56 %add5, 4
+ %and8 = and i56 %shr7, 4444132406286415
+ %add9 = add nuw nsw i56 %and8, %and6
+ %and10 = and i56 %add9, 18374686479671535 ; 0x00FF00FF00FF00FF
+ %shr11 = lshr i56 %add9, 8
+ %and12 = and i56 %shr11, 18374686479671535
+ %add13 = add nuw nsw i56 %and12, %and10
+ %and14 = and i56 %add13, 72057589759737855 ; 0x0000FFFF0000FFFF0000FFFF
+ %shr15 = lshr i56 %add13, 16
+ %and16 = and i56 %shr15, 72057589759737855
+ %add17 = add nuw nsw i56 %and16, %and14
+ %and18 = and i56 %add17, 4294967295 ; 0x00000000FFFFFFFF
+ %shr19 = lshr i56 %add17, 32
+ %add20 = add nuw nsw i56 %shr19, %and18
+ ret i56 %add20
+}
>From 35974f47215858098416cbbfbfc76905cdfe688c Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 31 Mar 2026 11:53:21 +0530
Subject: [PATCH 8/9] Implement the Splat review comments for efficient Mask
calculation
---
.../AggressiveInstCombine.cpp | 43 +++----------------
1 file changed, 7 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 41a138974a4ee..a0d5f8ad87696 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -388,7 +388,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
// uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);
// uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);
// uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);
-// uWord = (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
+// uWord = (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
// return (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;
// }
static bool tryToRecognizePopCount1(Instruction &I) {
@@ -409,45 +409,16 @@ static bool tryToRecognizePopCount1(Instruction &I) {
APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
- APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
- APInt Mask00FF;
- if (Len == 16) {
- Mask00FF = APInt(16, 0x00FF);
- } else {
- Mask00FF = APInt::getSplat(Len, APInt(16, 0x00FF));
- }
- APInt Mask0000FFFF;
- if (Len <= 32) {
- Mask0000FFFF = APInt(32, 0x0000FFFF);
- } else {
- Mask0000FFFF = APInt::getSplat(Len, APInt(32, 0x0000FFFF));
- }
- APInt Mask64 = APInt(64, 0x00000000FFFFFFFF);
Value *ShiftOp;
Value *Start = &I;
- APInt Mask;
for (unsigned I = Len; I >= 8; I = I / 2) {
- switch (I) {
- case 64:
- Mask = Mask64;
- break;
- case 32:
- Mask = Mask0000FFFF;
- break;
- case 16:
- Mask = Mask00FF;
- break;
- case 8:
- Mask = Mask0F;
- break;
- }
+ APInt Mask = APInt::getSplat(Len, APInt::getLowBitsSet(I, I / 2));
// Matching "(uWord & Mask) + (uWord>>I/2)".
// OR
// Matching "(uWord & Mask) + ((uWord>>I/2) &
- // 0x00000000FFFFFFFF)".
- if (Len >= I &&
- !(match(Start,
+ // Mask)".
+ if (!(match(Start,
m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
m_SpecificInt(Mask)),
m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) ||
@@ -461,7 +432,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
}
ShiftOp = nullptr;
- // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+ // Matching "uWord = (uWord & Mask33) + ((uWord>>2) & Mask33)".
if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),
m_SpecificInt(Mask33)),
m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask33)))))
@@ -469,8 +440,8 @@ static bool tryToRecognizePopCount1(Instruction &I) {
Start = ShiftOp;
ShiftOp = nullptr;
- // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
- // 0x55555555)".
+ // Matching "uWord = (uWord & Mask55) + ((uWord>>1) &
+ // Mask55)".
if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(1)),
m_SpecificInt(Mask55)),
m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask55)))))
>From 7e0eeaa77ee7955033d659c201876456848a2b57 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 1 Apr 2026 12:45:12 +0530
Subject: [PATCH 9/9] Implement more review comments
---
.../AggressiveInstCombine.cpp | 46 +-
.../AggressiveInstCombine/popcount.ll | 399 +++++++++---------
2 files changed, 219 insertions(+), 226 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index a0d5f8ad87696..885b4d3da2454 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -288,6 +288,16 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {
return true;
}
+/// Helper function to replace an instruction with a popcount intrinsic.
+/// This creates the ctpop intrinsic and replaces all uses of the instruction.
+static void replaceWithPopCount(Instruction &I, Value *Root) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ I.replaceAllUsesWith(
+ Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
+ ++NumPopCountRecognized;
+}
+
// Try to recognize below function as popcount intrinsic.
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -357,11 +367,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
};
if (CheckAndMask()) {
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(
- Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
- ++NumPopCountRecognized;
+ replaceWithPopCount(I, Root);
return true;
}
}
@@ -374,8 +380,6 @@ static bool tryToRecognizePopCount(Instruction &I) {
// Try to recognize below function as popcount intrinsic.
// Ref. Hacker Delights
-// Also used in TargetLowering::expandCTPOP().
-//
// int popcount32(unsigned int i) {
// uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);
// uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);
@@ -418,20 +422,16 @@ static bool tryToRecognizePopCount1(Instruction &I) {
// OR
// Matching "(uWord & Mask) + ((uWord>>I/2) &
// Mask)".
- if (!(match(Start,
- m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
- m_SpecificInt(Mask)),
- m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) ||
- match(Start,
- m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
- m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask))))))
+ if (!match(Start,
+ m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+ m_SpecificInt(Mask)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) &&
+ !match(Start, m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+ m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))))
return false;
-
Start = ShiftOp;
- ShiftOp = nullptr;
}
- ShiftOp = nullptr;
// Matching "uWord = (uWord & Mask33) + ((uWord>>2) & Mask33)".
if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),
m_SpecificInt(Mask33)),
@@ -439,19 +439,15 @@ static bool tryToRecognizePopCount1(Instruction &I) {
return false;
Start = ShiftOp;
- ShiftOp = nullptr;
// Matching "uWord = (uWord & Mask55) + ((uWord>>1) &
// Mask55)".
- if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(1)),
+ Value *Root;
+ if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(Root), m_SpecificInt(1)),
m_SpecificInt(Mask55)),
- m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask55)))))
+ m_And(m_Deferred(Root), m_SpecificInt(Mask55)))))
return false;
- LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
- IRBuilder<> Builder(&I);
- I.replaceAllUsesWith(
- Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp}));
- ++NumPopCountRecognized;
+ replaceWithPopCount(I, Root);
return true;
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 21167abbcef8f..9cdfdfaacb4bf 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -240,10 +240,9 @@ define i32 @popcount64_mask(i64 %x) {
ret i32 %13
}
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(i32 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(
-; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+define i32 @popcnt1_32(i32 noundef %uWord) {
+; CHECK-LABEL: define i32 @popcnt1_32(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
; CHECK-NEXT: ret i32 [[TMP0]]
@@ -252,29 +251,28 @@ entry:
%and = and i32 %uWord, 1431655765
%shr = lshr i32 %uWord, 1
%and1 = and i32 %shr, 1431655765
- %add = add nuw i32 %and1, %and
+ %add = add i32 %and1, %and
%and2 = and i32 %add, 858993459
%shr3 = lshr i32 %add, 2
%and4 = and i32 %shr3, 858993459
- %add5 = add nuw nsw i32 %and4, %and2
+ %add5 = add i32 %and4, %and2
%and6 = and i32 %add5, 252645135
%shr7 = lshr i32 %add5, 4
%and8 = and i32 %shr7, 252645135
- %add9 = add nuw nsw i32 %and8, %and6
+ %add9 = add i32 %and8, %and6
%and10 = and i32 %add9, 16711935
%shr11 = lshr i32 %add9, 8
%and12 = and i32 %shr11, 16711935
- %add13 = add nuw nsw i32 %and12, %and10
+ %add13 = add i32 %and12, %and10
%and14 = and i32 %add13, 65535
%shr15 = lshr i32 %add13, 16
- %add16 = add nuw nsw i32 %and14, %shr15
+ %add16 = add i32 %and14, %shr15
ret i32 %add16
}
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(i32 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(
-; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+define i32 @popcnt1_32_variant2(i32 noundef %uWord) {
+; CHECK-LABEL: define i32 @popcnt1_32_variant2(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
; CHECK-NEXT: ret i32 [[TMP0]]
@@ -283,30 +281,29 @@ entry:
%and = and i32 %uWord, 1431655765
%shr = lshr i32 %uWord, 1
%and1 = and i32 %shr, 1431655765
- %add = add nuw i32 %and1, %and
+ %add = add i32 %and1, %and
%and2 = and i32 %add, 858993459
%shr3 = lshr i32 %add, 2
%and4 = and i32 %shr3, 858993459
- %add5 = add nuw nsw i32 %and4, %and2
+ %add5 = add i32 %and4, %and2
%and6 = and i32 %add5, 252645135
%shr7 = lshr i32 %add5, 4
%and8 = and i32 %shr7, 252645135
- %add9 = add nuw nsw i32 %and8, %and6
+ %add9 = add i32 %and8, %and6
%and10 = and i32 %add9, 16711935
%shr11 = lshr i32 %add9, 8
%and12 = and i32 %shr11, 16711935
- %add13 = add nuw nsw i32 %and12, %and10
+ %add13 = add i32 %and12, %and10
%and14 = and i32 %add13, 65535
%shr15 = lshr i32 %add13, 16
%and16 = and i32 %shr15, 65535
- %add17 = add nuw nsw i32 %and14, %and16
+ %add17 = add i32 %and14, %and16
ret i32 %add17
}
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef i64 @popcnt1_64(i64 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef i64 @popcnt1_64(
-; CHECK-SAME: i64 noundef [[UWORD:%.*]]) local_unnamed_addr {
+define i64 @popcnt1_64(i64 noundef %uWord) {
+; CHECK-LABEL: define i64 @popcnt1_64(
+; CHECK-SAME: i64 noundef [[UWORD:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[UWORD]])
; CHECK-NEXT: ret i64 [[TMP0]]
@@ -315,26 +312,26 @@ entry:
%and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
%shr = lshr i64 %uWord, 1
%and1 = and i64 %shr, 6148914691236517205
- %add = add nuw i64 %and1, %and
+ %add = add i64 %and1, %and
%and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
%shr3 = lshr i64 %add, 2
%and4 = and i64 %shr3, 3689348814741910323
- %add5 = add nuw nsw i64 %and4, %and2
+ %add5 = add i64 %and4, %and2
%and6 = and i64 %add5, 1085102592571150095 ; 0F0F0F0F0F0F0F0F
%shr7 = lshr i64 %add5, 4
%and8 = and i64 %shr7, 1085102592571150095
- %add9 = add nuw nsw i64 %and8, %and6
+ %add9 = add i64 %and8, %and6
%and10 = and i64 %add9, 71777214294589695 ; 00FF00FF00FF00FF
%shr11 = lshr i64 %add9, 8
%and12 = and i64 %shr11, 71777214294589695
- %add13 = add nuw nsw i64 %and12, %and10
+ %add13 = add i64 %and12, %and10
%and14 = and i64 %add13, 281470681808895 ; 0000FFFF0000FFFF
%shr15 = lshr i64 %add13, 16
%and16 = and i64 %shr15, 281470681808895
- %add17 = add nuw nsw i64 %and16, %and14
+ %add17 = add i64 %and16, %and14
%and18 = and i64 %add17, 4294967295 ; 00000000FFFFFFFF
%shr19 = lshr i64 %add17, 32
- %add20 = add nuw nsw i64 %and18, %shr19
+ %add20 = add i64 %and18, %shr19
ret i64 %add20
}
@@ -347,18 +344,18 @@ define i16 @popcnt1_16(i16 noundef %uWord) {
%and = and i16 %uWord, 21845 ; 0x5555
%shr = lshr i16 %uWord, 1
%and1 = and i16 %shr, 21845
- %add = add nuw i16 %and1, %and
+ %add = add i16 %and1, %and
%and2 = and i16 %add, 13107 ; 0x3333
%shr3 = lshr i16 %add, 2
%and4 = and i16 %shr3, 13107
- %add5 = add nuw nsw i16 %and4, %and2
+ %add5 = add i16 %and4, %and2
%and6 = and i16 %add5, 3855 ; 0x0F0F
%shr7 = lshr i16 %add5, 4
%and8 = and i16 %shr7, 3855
- %add9 = add nuw nsw i16 %and8, %and6
+ %add9 = add i16 %and8, %and6
%and10 = and i16 %add9, 255 ; 0x00FF
%shr11 = lshr i16 %add9, 8
- %add13 = add nuw nsw i16 %shr11, %and10
+ %add13 = add i16 %shr11, %and10
ret i16 %add13
}
@@ -371,19 +368,19 @@ define i16 @popcnt1_16_variant2(i16 noundef %uWord) {
%and = and i16 %uWord, 21845 ; 0x5555
%shr = lshr i16 %uWord, 1
%and1 = and i16 %shr, 21845
- %add = add nuw i16 %and1, %and
+ %add = add i16 %and1, %and
%and2 = and i16 %add, 13107 ; 0x3333
%shr3 = lshr i16 %add, 2
%and4 = and i16 %shr3, 13107
- %add5 = add nuw nsw i16 %and4, %and2
+ %add5 = add i16 %and4, %and2
%and6 = and i16 %add5, 3855 ; 0x0F0F
%shr7 = lshr i16 %add5, 4
%and8 = and i16 %shr7, 3855
- %add9 = add nuw nsw i16 %and8, %and6
+ %add9 = add i16 %and8, %and6
%and10 = and i16 %add9, 255 ; 0x00FF
%shr11 = lshr i16 %add9, 8
%and12 = and i16 %shr11, 255
- %add13 = add nuw nsw i16 %and10, %and12
+ %add13 = add i16 %and10, %and12
ret i16 %add13
}
@@ -396,18 +393,18 @@ define <8 x i16> @popcnt1_16vec(<8 x i16> %uWord) {
%and = and <8 x i16> %uWord, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
%shr = lshr <8 x i16> %uWord, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%and1 = and <8 x i16> %shr, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
- %add = add nuw <8 x i16> %and1, %and
+ %add = add <8 x i16> %and1, %and
%and2 = and <8 x i16> %add, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
%shr3 = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%and4 = and <8 x i16> %shr3, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
- %add5 = add nuw nsw <8 x i16> %and4, %and2
+ %add5 = add <8 x i16> %and4, %and2
%and6 = and <8 x i16> %add5, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
%shr7 = lshr <8 x i16> %add5, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%and8 = and <8 x i16> %shr7, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
- %add9 = add nuw nsw <8 x i16> %and8, %and6
+ %add9 = add <8 x i16> %and8, %and6
%and10 = and <8 x i16> %add9, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%shr11 = lshr <8 x i16> %add9, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
- %add13 = add nuw nsw <8 x i16> %shr11, %and10
+ %add13 = add <8 x i16> %shr11, %and10
ret <8 x i16> %add13
}
@@ -420,22 +417,22 @@ define <4 x i32> @popcnt1_32vec(<4 x i32> %uWord) {
%and = and <4 x i32> %uWord, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
%shr = lshr <4 x i32> %uWord, <i32 1, i32 1, i32 1, i32 1>
%and1 = and <4 x i32> %shr, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
- %add = add nuw <4 x i32> %and1, %and
+ %add = add <4 x i32> %and1, %and
%and2 = and <4 x i32> %add, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
%shr3 = lshr <4 x i32> %add, <i32 2, i32 2, i32 2, i32 2>
%and4 = and <4 x i32> %shr3, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
- %add5 = add nuw nsw <4 x i32> %and4, %and2
+ %add5 = add <4 x i32> %and4, %and2
%and6 = and <4 x i32> %add5, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
%shr7 = lshr <4 x i32> %add5, <i32 4, i32 4, i32 4, i32 4>
%and8 = and <4 x i32> %shr7, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
- %add9 = add nuw nsw <4 x i32> %and8, %and6
+ %add9 = add <4 x i32> %and8, %and6
%and10 = and <4 x i32> %add9, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
%shr11 = lshr <4 x i32> %add9, <i32 8, i32 8, i32 8, i32 8>
%and12 = and <4 x i32> %shr11, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
- %add13 = add nuw nsw <4 x i32> %and12, %and10
+ %add13 = add <4 x i32> %and12, %and10
%and14 = and <4 x i32> %add13, <i32 65535, i32 65535, i32 65535, i32 65535>
%shr15 = lshr <4 x i32> %add13, <i32 16, i32 16, i32 16, i32 16>
- %add16 = add nuw nsw <4 x i32> %and14, %shr15
+ %add16 = add <4 x i32> %and14, %shr15
ret <4 x i32> %add16
}
@@ -448,27 +445,27 @@ define i64 @popcnt1_64_variant2(i64 noundef %uWord) {
%and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
%shr = lshr i64 %uWord, 1
%and1 = and i64 %shr, 6148914691236517205
- %add = add nuw i64 %and1, %and
+ %add = add i64 %and1, %and
%and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
%shr3 = lshr i64 %add, 2
%and4 = and i64 %shr3, 3689348814741910323
- %add5 = add nuw nsw i64 %and4, %and2
+ %add5 = add i64 %and4, %and2
%and6 = and i64 %add5, 1085102592571150095 ; 0x0F0F0F0F0F0F0F0F
%shr7 = lshr i64 %add5, 4
%and8 = and i64 %shr7, 1085102592571150095
- %add9 = add nuw nsw i64 %and8, %and6
+ %add9 = add i64 %and8, %and6
%and10 = and i64 %add9, 71777214294589695 ; 0x00FF00FF00FF00FF
%shr11 = lshr i64 %add9, 8
%and12 = and i64 %shr11, 71777214294589695
- %add13 = add nuw nsw i64 %and12, %and10
+ %add13 = add i64 %and12, %and10
%and14 = and i64 %add13, 281470681808895 ; 0x0000FFFF0000FFFF
%shr15 = lshr i64 %add13, 16
%and16 = and i64 %shr15, 281470681808895
- %add17 = add nuw nsw i64 %and16, %and14
+ %add17 = add i64 %and16, %and14
%and18 = and i64 %add17, 4294967295 ; 0x00000000FFFFFFFF
%shr19 = lshr i64 %add17, 32
%and20 = and i64 %shr19, 4294967295
- %add21 = add nuw nsw i64 %and18, %and20
+ %add21 = add i64 %and18, %and20
ret i64 %add21
}
@@ -481,26 +478,26 @@ define <2 x i64> @popcnt1_64vec(<2 x i64> %uWord) {
%and = and <2 x i64> %uWord, <i64 6148914691236517205, i64 6148914691236517205>
%shr = lshr <2 x i64> %uWord, <i64 1, i64 1>
%and1 = and <2 x i64> %shr, <i64 6148914691236517205, i64 6148914691236517205>
- %add = add nuw <2 x i64> %and1, %and
+ %add = add <2 x i64> %and1, %and
%and2 = and <2 x i64> %add, <i64 3689348814741910323, i64 3689348814741910323>
%shr3 = lshr <2 x i64> %add, <i64 2, i64 2>
%and4 = and <2 x i64> %shr3, <i64 3689348814741910323, i64 3689348814741910323>
- %add5 = add nuw nsw <2 x i64> %and4, %and2
+ %add5 = add <2 x i64> %and4, %and2
%and6 = and <2 x i64> %add5, <i64 1085102592571150095, i64 1085102592571150095>
%shr7 = lshr <2 x i64> %add5, <i64 4, i64 4>
%and8 = and <2 x i64> %shr7, <i64 1085102592571150095, i64 1085102592571150095>
- %add9 = add nuw nsw <2 x i64> %and8, %and6
+ %add9 = add <2 x i64> %and8, %and6
%and10 = and <2 x i64> %add9, <i64 71777214294589695, i64 71777214294589695>
%shr11 = lshr <2 x i64> %add9, <i64 8, i64 8>
%and12 = and <2 x i64> %shr11, <i64 71777214294589695, i64 71777214294589695>
- %add13 = add nuw nsw <2 x i64> %and12, %and10
+ %add13 = add <2 x i64> %and12, %and10
%and14 = and <2 x i64> %add13, <i64 281470681808895, i64 281470681808895>
%shr15 = lshr <2 x i64> %add13, <i64 16, i64 16>
%and16 = and <2 x i64> %shr15, <i64 281470681808895, i64 281470681808895>
- %add17 = add nuw nsw <2 x i64> %and16, %and14
+ %add17 = add <2 x i64> %and16, %and14
%and18 = and <2 x i64> %add17, <i64 4294967295, i64 4294967295>
%shr19 = lshr <2 x i64> %add17, <i64 32, i64 32>
- %add20 = add nuw nsw <2 x i64> %and18, %shr19
+ %add20 = add <2 x i64> %and18, %shr19
ret <2 x i64> %add20
}
@@ -512,29 +509,29 @@ define i8 @popcnt1_8_negative(i8 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i8 [[UWORD:%.*]], 85
; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i8 [[SHR]], 85
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i8 [[ADD]], 51
; CHECK-NEXT: [[SHR3:%.*]] = lshr i8 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i8 [[SHR3]], 51
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i8 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i8 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i8 [[ADD5]], 15
; CHECK-NEXT: [[SHR7:%.*]] = lshr i8 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i8 [[SHR7]], 15
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i8 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i8 [[AND8]], [[AND6]]
; CHECK-NEXT: ret i8 [[ADD9]]
;
%and = and i8 %uWord, 85 ; 0x55
%shr = lshr i8 %uWord, 1
%and1 = and i8 %shr, 85
- %add = add nuw i8 %and1, %and
+ %add = add i8 %and1, %and
%and2 = and i8 %add, 51 ; 0x33
%shr3 = lshr i8 %add, 2
%and4 = and i8 %shr3, 51
- %add5 = add nuw nsw i8 %and4, %and2
+ %add5 = add i8 %and4, %and2
%and6 = and i8 %add5, 15 ; 0x0F
%shr7 = lshr i8 %add5, 4
%and8 = and i8 %shr7, 15
- %add9 = add nuw nsw i8 %and8, %and6
+ %add9 = add i8 %and8, %and6
ret i8 %add9
}
@@ -544,59 +541,59 @@ define i128 @popcnt1_128_negative(i128 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i128 [[UWORD:%.*]], 113427455640312821154458202477256070485
; CHECK-NEXT: [[SHR:%.*]] = lshr i128 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i128 [[SHR]], 113427455640312821154458202477256070485
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i128 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i128 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i128 [[ADD]], 68056473384187692692674921486353642291
; CHECK-NEXT: [[SHR3:%.*]] = lshr i128 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i128 [[SHR3]], 68056473384187692692674921486353642291
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i128 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i128 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i128 [[ADD5]], 20016609818878733144904388672456953615
; CHECK-NEXT: [[SHR7:%.*]] = lshr i128 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i128 [[SHR7]], 20016609818878733144904388672456953615
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i128 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i128 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i128 [[ADD9]], 1334440654591915542993625911497130241
; CHECK-NEXT: [[SHR11:%.*]] = lshr i128 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i128 [[SHR11]], 1334440654591915542993625911497130241
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i128 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i128 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i128 [[ADD13]], 5192296858534827628530496329220095
; CHECK-NEXT: [[SHR15:%.*]] = lshr i128 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i128 [[SHR15]], 5192296858534827628530496329220095
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i128 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i128 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i128 [[ADD17]], 79228162514264337593543950335
; CHECK-NEXT: [[SHR19:%.*]] = lshr i128 [[ADD17]], 32
; CHECK-NEXT: [[AND20:%.*]] = and i128 [[SHR19]], 79228162514264337593543950335
-; CHECK-NEXT: [[ADD21:%.*]] = add nuw nsw i128 [[AND18]], [[AND20]]
+; CHECK-NEXT: [[ADD21:%.*]] = add i128 [[AND18]], [[AND20]]
; CHECK-NEXT: [[AND22:%.*]] = and i128 [[ADD21]], -1
; CHECK-NEXT: [[SHR23:%.*]] = lshr i128 [[ADD21]], 64
-; CHECK-NEXT: [[ADD24:%.*]] = add nuw nsw i128 [[AND22]], [[SHR23]]
+; CHECK-NEXT: [[ADD24:%.*]] = add i128 [[AND22]], [[SHR23]]
; CHECK-NEXT: ret i128 [[ADD24]]
;
%and = and i128 %uWord, 113427455640312821154458202477256070485 ; 0x55555555555555555555555555555555
%shr = lshr i128 %uWord, 1
%and1 = and i128 %shr, 113427455640312821154458202477256070485
- %add = add nuw i128 %and1, %and
+ %add = add i128 %and1, %and
%and2 = and i128 %add, 68056473384187692692674921486353642291 ; 0x33333333333333333333333333333333
%shr3 = lshr i128 %add, 2
%and4 = and i128 %shr3, 68056473384187692692674921486353642291
- %add5 = add nuw nsw i128 %and4, %and2
+ %add5 = add i128 %and4, %and2
%and6 = and i128 %add5, 20016609818878733144904388672456953615 ; 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
%shr7 = lshr i128 %add5, 4
%and8 = and i128 %shr7, 20016609818878733144904388672456953615
- %add9 = add nuw nsw i128 %and8, %and6
+ %add9 = add i128 %and8, %and6
%and10 = and i128 %add9, 1334440654591915542993625911497130241 ; 0x00FF00FF00FF00FF00FF00FF00FF00FF
%shr11 = lshr i128 %add9, 8
%and12 = and i128 %shr11, 1334440654591915542993625911497130241
- %add13 = add nuw nsw i128 %and12, %and10
+ %add13 = add i128 %and12, %and10
%and14 = and i128 %add13, 5192296858534827628530496329220095 ; 0x0000FFFF0000FFFF0000FFFF0000FFFF
%shr15 = lshr i128 %add13, 16
%and16 = and i128 %shr15, 5192296858534827628530496329220095
- %add17 = add nuw nsw i128 %and16, %and14
+ %add17 = add i128 %and16, %and14
%and18 = and i128 %add17, 79228162514264337593543950335 ; 0x00000000FFFFFFFF00000000FFFFFFFF
%shr19 = lshr i128 %add17, 32
%and20 = and i128 %shr19, 79228162514264337593543950335
- %add21 = add nuw nsw i128 %and18, %and20
+ %add21 = add i128 %and18, %and20
%and22 = and i128 %add21, 340282366920938463463374607431768211455 ; 0x0000000000000000FFFFFFFFFFFFFFFF
%shr23 = lshr i128 %add21, 64
- %add24 = add nuw nsw i128 %and22, %shr23
+ %add24 = add i128 %and22, %shr23
ret i128 %add24
}
@@ -606,18 +603,18 @@ define i32 @popcnt1_32_missing_step1(i32 noundef %uWord) {
; CHECK-NEXT: [[AND2:%.*]] = and i32 [[UWORD:%.*]], 858993459
; CHECK-NEXT: [[SHR3:%.*]] = lshr i32 [[UWORD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i32 [[SHR3]], 858993459
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i32 [[ADD5]], 252645135
; CHECK-NEXT: [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i32 [[SHR7]], 252645135
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i32 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i32 [[ADD9]], 16711935
; CHECK-NEXT: [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i32 [[SHR11]], 16711935
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i32 [[ADD13]], 65535
; CHECK-NEXT: [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
-; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT: [[ADD16:%.*]] = add i32 [[AND14]], [[SHR15]]
; CHECK-NEXT: ret i32 [[ADD16]]
;
; Missing: %and = and i32 %uWord, 1431655765
@@ -628,18 +625,18 @@ define i32 @popcnt1_32_missing_step1(i32 noundef %uWord) {
%and2 = and i32 %uWord, 858993459
%shr3 = lshr i32 %uWord, 2
%and4 = and i32 %shr3, 858993459
- %add5 = add nuw nsw i32 %and4, %and2
+ %add5 = add i32 %and4, %and2
%and6 = and i32 %add5, 252645135
%shr7 = lshr i32 %add5, 4
%and8 = and i32 %shr7, 252645135
- %add9 = add nuw nsw i32 %and8, %and6
+ %add9 = add i32 %and8, %and6
%and10 = and i32 %add9, 16711935
%shr11 = lshr i32 %add9, 8
%and12 = and i32 %shr11, 16711935
- %add13 = add nuw nsw i32 %and12, %and10
+ %add13 = add i32 %and12, %and10
%and14 = and i32 %add13, 65535
%shr15 = lshr i32 %add13, 16
- %add16 = add nuw nsw i32 %and14, %shr15
+ %add16 = add i32 %and14, %shr15
ret i32 %add16
}
@@ -649,28 +646,28 @@ define i32 @popcnt1_32_missing_step3(i32 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i32 [[UWORD:%.*]], 1431655765
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 1431655765
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ADD]], 858993459
; CHECK-NEXT: [[SHR3:%.*]] = lshr i32 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i32 [[SHR3]], 858993459
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND10:%.*]] = and i32 [[ADD5]], 16711935
; CHECK-NEXT: [[SHR11:%.*]] = lshr i32 [[ADD5]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i32 [[SHR11]], 16711935
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i32 [[ADD13]], 65535
; CHECK-NEXT: [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
-; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT: [[ADD16:%.*]] = add i32 [[AND14]], [[SHR15]]
; CHECK-NEXT: ret i32 [[ADD16]]
;
%and = and i32 %uWord, 1431655765
%shr = lshr i32 %uWord, 1
%and1 = and i32 %shr, 1431655765
- %add = add nuw i32 %and1, %and
+ %add = add i32 %and1, %and
%and2 = and i32 %add, 858993459
%shr3 = lshr i32 %add, 2
%and4 = and i32 %shr3, 858993459
- %add5 = add nuw nsw i32 %and4, %and2
+ %add5 = add i32 %and4, %and2
; Missing: %and6 = and i32 %add5, 252645135
; Missing: %shr7 = lshr i32 %add5, 4
; Missing: %and8 = and i32 %shr7, 252645135
@@ -679,10 +676,10 @@ define i32 @popcnt1_32_missing_step3(i32 noundef %uWord) {
%and10 = and i32 %add5, 16711935
%shr11 = lshr i32 %add5, 8
%and12 = and i32 %shr11, 16711935
- %add13 = add nuw nsw i32 %and12, %and10
+ %add13 = add i32 %and12, %and10
%and14 = and i32 %add13, 65535
%shr15 = lshr i32 %add13, 16
- %add16 = add nuw nsw i32 %and14, %shr15
+ %add16 = add i32 %and14, %shr15
ret i32 %add16
}
@@ -692,43 +689,43 @@ define i32 @popcnt1_32_wrong_mask(i32 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i32 [[UWORD:%.*]], 1145324612
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 1145324612
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ADD]], 858993459
; CHECK-NEXT: [[SHR3:%.*]] = lshr i32 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i32 [[SHR3]], 858993459
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i32 [[ADD5]], 252645135
; CHECK-NEXT: [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i32 [[SHR7]], 252645135
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i32 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i32 [[ADD9]], 16711935
; CHECK-NEXT: [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i32 [[SHR11]], 16711935
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i32 [[ADD13]], 65535
; CHECK-NEXT: [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
-; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT: [[ADD16:%.*]] = add i32 [[AND14]], [[SHR15]]
; CHECK-NEXT: ret i32 [[ADD16]]
;
%and = and i32 %uWord, 1145324612 ; 0x44444444 instead of 0x55555555
%shr = lshr i32 %uWord, 1
%and1 = and i32 %shr, 1145324612
- %add = add nuw i32 %and1, %and
+ %add = add i32 %and1, %and
%and2 = and i32 %add, 858993459
%shr3 = lshr i32 %add, 2
%and4 = and i32 %shr3, 858993459
- %add5 = add nuw nsw i32 %and4, %and2
+ %add5 = add i32 %and4, %and2
%and6 = and i32 %add5, 252645135
%shr7 = lshr i32 %add5, 4
%and8 = and i32 %shr7, 252645135
- %add9 = add nuw nsw i32 %and8, %and6
+ %add9 = add i32 %and8, %and6
%and10 = and i32 %add9, 16711935
%shr11 = lshr i32 %add9, 8
%and12 = and i32 %shr11, 16711935
- %add13 = add nuw nsw i32 %and12, %and10
+ %add13 = add i32 %and12, %and10
%and14 = and i32 %add13, 65535
%shr15 = lshr i32 %add13, 16
- %add16 = add nuw nsw i32 %and14, %shr15
+ %add16 = add i32 %and14, %shr15
ret i32 %add16
}
@@ -738,22 +735,22 @@ define i64 @popcnt1_64_missing_step1(i64 noundef %uWord) {
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[UWORD:%.*]], 3689348814741910323
; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[UWORD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i64 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
; CHECK-NEXT: [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
; CHECK-NEXT: [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i64 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
; CHECK-NEXT: [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
; CHECK-NEXT: [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
-; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i64 [[AND18]], [[SHR19]]
; CHECK-NEXT: ret i64 [[ADD20]]
;
; Missing: %and = and i64 %uWord, 6148914691236517205
@@ -764,22 +761,22 @@ define i64 @popcnt1_64_missing_step1(i64 noundef %uWord) {
%and2 = and i64 %uWord, 3689348814741910323
%shr3 = lshr i64 %uWord, 2
%and4 = and i64 %shr3, 3689348814741910323
- %add5 = add nuw nsw i64 %and4, %and2
+ %add5 = add i64 %and4, %and2
%and6 = and i64 %add5, 1085102592571150095
%shr7 = lshr i64 %add5, 4
%and8 = and i64 %shr7, 1085102592571150095
- %add9 = add nuw nsw i64 %and8, %and6
+ %add9 = add i64 %and8, %and6
%and10 = and i64 %add9, 71777214294589695
%shr11 = lshr i64 %add9, 8
%and12 = and i64 %shr11, 71777214294589695
- %add13 = add nuw nsw i64 %and12, %and10
+ %add13 = add i64 %and12, %and10
%and14 = and i64 %add13, 281470681808895
%shr15 = lshr i64 %add13, 16
%and16 = and i64 %shr15, 281470681808895
- %add17 = add nuw nsw i64 %and16, %and14
+ %add17 = add i64 %and16, %and14
%and18 = and i64 %add17, 4294967295
%shr19 = lshr i64 %add17, 32
- %add20 = add nuw nsw i64 %and18, %shr19
+ %add20 = add i64 %and18, %shr19
ret i64 %add20
}
@@ -789,32 +786,32 @@ define i64 @popcnt1_64_missing_step3(i64 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i64 [[UWORD:%.*]], 6148914691236517205
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 6148914691236517205
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i64 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND10:%.*]] = and i64 [[ADD5]], 71777214294589695
; CHECK-NEXT: [[SHR11:%.*]] = lshr i64 [[ADD5]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i64 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
; CHECK-NEXT: [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
; CHECK-NEXT: [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
-; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i64 [[AND18]], [[SHR19]]
; CHECK-NEXT: ret i64 [[ADD20]]
;
%and = and i64 %uWord, 6148914691236517205
%shr = lshr i64 %uWord, 1
%and1 = and i64 %shr, 6148914691236517205
- %add = add nuw i64 %and1, %and
+ %add = add i64 %and1, %and
%and2 = and i64 %add, 3689348814741910323
%shr3 = lshr i64 %add, 2
%and4 = and i64 %shr3, 3689348814741910323
- %add5 = add nuw nsw i64 %and4, %and2
+ %add5 = add i64 %and4, %and2
; Missing: %and6 = and i64 %add5, 1085102592571150095
; Missing: %shr7 = lshr i64 %add5, 4
; Missing: %and8 = and i64 %shr7, 1085102592571150095
@@ -823,14 +820,14 @@ define i64 @popcnt1_64_missing_step3(i64 noundef %uWord) {
%and10 = and i64 %add5, 71777214294589695
%shr11 = lshr i64 %add5, 8
%and12 = and i64 %shr11, 71777214294589695
- %add13 = add nuw nsw i64 %and12, %and10
+ %add13 = add i64 %and12, %and10
%and14 = and i64 %add13, 281470681808895
%shr15 = lshr i64 %add13, 16
%and16 = and i64 %shr15, 281470681808895
- %add17 = add nuw nsw i64 %and16, %and14
+ %add17 = add i64 %and16, %and14
%and18 = and i64 %add17, 4294967295
%shr19 = lshr i64 %add17, 32
- %add20 = add nuw nsw i64 %and18, %shr19
+ %add20 = add i64 %and18, %shr19
ret i64 %add20
}
@@ -840,51 +837,51 @@ define i64 @popcnt1_64_wrong_mask(i64 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i64 [[UWORD:%.*]], 4919131752989213764
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 4919131752989213764
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i64 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
; CHECK-NEXT: [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
; CHECK-NEXT: [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i64 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
; CHECK-NEXT: [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
; CHECK-NEXT: [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
-; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i64 [[AND18]], [[SHR19]]
; CHECK-NEXT: ret i64 [[ADD20]]
;
%and = and i64 %uWord, 4919131752989213764 ; 0x4444444444444444 instead of 0x5555555555555555
%shr = lshr i64 %uWord, 1
%and1 = and i64 %shr, 4919131752989213764
- %add = add nuw i64 %and1, %and
+ %add = add i64 %and1, %and
%and2 = and i64 %add, 3689348814741910323
%shr3 = lshr i64 %add, 2
%and4 = and i64 %shr3, 3689348814741910323
- %add5 = add nuw nsw i64 %and4, %and2
+ %add5 = add i64 %and4, %and2
%and6 = and i64 %add5, 1085102592571150095
%shr7 = lshr i64 %add5, 4
%and8 = and i64 %shr7, 1085102592571150095
- %add9 = add nuw nsw i64 %and8, %and6
+ %add9 = add i64 %and8, %and6
%and10 = and i64 %add9, 71777214294589695
%shr11 = lshr i64 %add9, 8
%and12 = and i64 %shr11, 71777214294589695
- %add13 = add nuw nsw i64 %and12, %and10
+ %add13 = add i64 %and12, %and10
%and14 = and i64 %add13, 281470681808895
%shr15 = lshr i64 %add13, 16
%and16 = and i64 %shr15, 281470681808895
- %add17 = add nuw nsw i64 %and16, %and14
+ %add17 = add i64 %and16, %and14
%and18 = and i64 %add17, 4294967295
%shr19 = lshr i64 %add17, 32
- %add20 = add nuw nsw i64 %and18, %shr19
+ %add20 = add i64 %and18, %shr19
ret i64 %add20
}
@@ -894,14 +891,14 @@ define i16 @popcnt1_16_missing_step1(i16 noundef %uWord) {
; CHECK-NEXT: [[AND2:%.*]] = and i16 [[UWORD:%.*]], 13107
; CHECK-NEXT: [[SHR3:%.*]] = lshr i16 [[UWORD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i16 [[SHR3]], 13107
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i16 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i16 [[ADD5]], 3855
; CHECK-NEXT: [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i16 [[SHR7]], 3855
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i16 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i16 [[ADD9]], 255
; CHECK-NEXT: [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i16 [[SHR11]], [[AND10]]
; CHECK-NEXT: ret i16 [[ADD13]]
;
; Missing: %and = and i16 %uWord, 21845
@@ -912,14 +909,14 @@ define i16 @popcnt1_16_missing_step1(i16 noundef %uWord) {
%and2 = and i16 %uWord, 13107
%shr3 = lshr i16 %uWord, 2
%and4 = and i16 %shr3, 13107
- %add5 = add nuw nsw i16 %and4, %and2
+ %add5 = add i16 %and4, %and2
%and6 = and i16 %add5, 3855
%shr7 = lshr i16 %add5, 4
%and8 = and i16 %shr7, 3855
- %add9 = add nuw nsw i16 %and8, %and6
+ %add9 = add i16 %and8, %and6
%and10 = and i16 %add9, 255
%shr11 = lshr i16 %add9, 8
- %add13 = add nuw nsw i16 %shr11, %and10
+ %add13 = add i16 %shr11, %and10
ret i16 %add13
}
@@ -929,24 +926,24 @@ define i16 @popcnt1_16_missing_step3(i16 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i16 [[UWORD:%.*]], 21845
; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i16 [[SHR]], 21845
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i16 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i16 [[ADD]], 13107
; CHECK-NEXT: [[SHR3:%.*]] = lshr i16 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i16 [[SHR3]], 13107
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i16 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND10:%.*]] = and i16 [[ADD5]], 255
; CHECK-NEXT: [[SHR11:%.*]] = lshr i16 [[ADD5]], 8
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i16 [[SHR11]], [[AND10]]
; CHECK-NEXT: ret i16 [[ADD13]]
;
%and = and i16 %uWord, 21845
%shr = lshr i16 %uWord, 1
%and1 = and i16 %shr, 21845
- %add = add nuw i16 %and1, %and
+ %add = add i16 %and1, %and
%and2 = and i16 %add, 13107
%shr3 = lshr i16 %add, 2
%and4 = and i16 %shr3, 13107
- %add5 = add nuw nsw i16 %and4, %and2
+ %add5 = add i16 %and4, %and2
; Missing: %and6 = and i16 %add5, 3855
; Missing: %shr7 = lshr i16 %add5, 4
; Missing: %and8 = and i16 %shr7, 3855
@@ -954,7 +951,7 @@ define i16 @popcnt1_16_missing_step3(i16 noundef %uWord) {
; Jumping to step 4 (0x00FF mask)
%and10 = and i16 %add5, 255
%shr11 = lshr i16 %add5, 8
- %add13 = add nuw nsw i16 %shr11, %and10
+ %add13 = add i16 %shr11, %and10
ret i16 %add13
}
@@ -964,35 +961,35 @@ define i16 @popcnt1_16_wrong_mask(i16 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i16 [[UWORD:%.*]], 17476
; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i16 [[SHR]], 17476
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i16 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i16 [[ADD]], 13107
; CHECK-NEXT: [[SHR3:%.*]] = lshr i16 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i16 [[SHR3]], 13107
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i16 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i16 [[ADD5]], 3855
; CHECK-NEXT: [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i16 [[SHR7]], 3855
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i16 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i16 [[ADD9]], 255
; CHECK-NEXT: [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i16 [[SHR11]], [[AND10]]
; CHECK-NEXT: ret i16 [[ADD13]]
;
%and = and i16 %uWord, 17476 ; 0x4444 instead of 0x5555
%shr = lshr i16 %uWord, 1
%and1 = and i16 %shr, 17476
- %add = add nuw i16 %and1, %and
+ %add = add i16 %and1, %and
%and2 = and i16 %add, 13107
%shr3 = lshr i16 %add, 2
%and4 = and i16 %shr3, 13107
- %add5 = add nuw nsw i16 %and4, %and2
+ %add5 = add i16 %and4, %and2
%and6 = and i16 %add5, 3855
%shr7 = lshr i16 %add5, 4
%and8 = and i16 %shr7, 3855
- %add9 = add nuw nsw i16 %and8, %and6
+ %add9 = add i16 %and8, %and6
%and10 = and i16 %add9, 255
%shr11 = lshr i16 %add9, 8
- %add13 = add nuw nsw i16 %shr11, %and10
+ %add13 = add i16 %shr11, %and10
ret i16 %add13
}
@@ -1002,43 +999,43 @@ define i24 @popcnt1_negative_i24(i24 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i24 [[UWORD:%.*]], 5592405
; CHECK-NEXT: [[SHR:%.*]] = lshr i24 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i24 [[SHR]], 5592405
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i24 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i24 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i24 [[ADD]], 3355443
; CHECK-NEXT: [[SHR3:%.*]] = lshr i24 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i24 [[SHR3]], 3355443
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i24 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i24 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i24 [[ADD5]], 986895
; CHECK-NEXT: [[SHR7:%.*]] = lshr i24 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i24 [[SHR7]], 986895
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i24 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i24 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i24 [[ADD9]], 65535
; CHECK-NEXT: [[SHR11:%.*]] = lshr i24 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i24 [[SHR11]], 65535
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i24 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i24 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i24 [[ADD13]], 255
; CHECK-NEXT: [[SHR15:%.*]] = lshr i24 [[ADD13]], 16
-; CHECK-NEXT: [[ADD16:%.*]] = add nuw nsw i24 [[SHR15]], [[AND14]]
+; CHECK-NEXT: [[ADD16:%.*]] = add i24 [[SHR15]], [[AND14]]
; CHECK-NEXT: ret i24 [[ADD16]]
;
%and = and i24 %uWord, 5592405 ; 0x555555
%shr = lshr i24 %uWord, 1
%and1 = and i24 %shr, 5592405
- %add = add nuw i24 %and1, %and
+ %add = add i24 %and1, %and
%and2 = and i24 %add, 3355443 ; 0x333333
%shr3 = lshr i24 %add, 2
%and4 = and i24 %shr3, 3355443
- %add5 = add nuw nsw i24 %and4, %and2
+ %add5 = add i24 %and4, %and2
%and6 = and i24 %add5, 986895 ; 0x0F0F0F
%shr7 = lshr i24 %add5, 4
%and8 = and i24 %shr7, 986895
- %add9 = add nuw nsw i24 %and8, %and6
+ %add9 = add i24 %and8, %and6
%and10 = and i24 %add9, 65535 ; 0x00FFFF
%shr11 = lshr i24 %add9, 8
%and12 = and i24 %shr11, 65535
- %add13 = add nuw nsw i24 %and12, %and10
+ %add13 = add i24 %and12, %and10
%and14 = and i24 %add13, 255 ; 0x0000FF
%shr15 = lshr i24 %add13, 16
- %add16 = add nuw nsw i24 %shr15, %and14
+ %add16 = add i24 %shr15, %and14
ret i24 %add16
}
@@ -1048,51 +1045,51 @@ define i40 @popcnt1_negative_i40(i40 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i40 [[UWORD:%.*]], 366503875925
; CHECK-NEXT: [[SHR:%.*]] = lshr i40 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i40 [[SHR]], 366503875925
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i40 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i40 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i40 [[ADD]], 219902325555
; CHECK-NEXT: [[SHR3:%.*]] = lshr i40 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i40 [[SHR3]], 219902325555
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i40 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i40 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i40 [[ADD5]], 67818775935
; CHECK-NEXT: [[SHR7:%.*]] = lshr i40 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i40 [[SHR7]], 67818775935
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i40 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i40 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i40 [[ADD9]], 280371153695
; CHECK-NEXT: [[SHR11:%.*]] = lshr i40 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i40 [[SHR11]], 280371153695
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i40 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i40 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i40 [[ADD13]], -1
; CHECK-NEXT: [[SHR15:%.*]] = lshr i40 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i40 [[SHR15]], -1
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i40 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i40 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i40 [[ADD17]], 4294967295
; CHECK-NEXT: [[SHR19:%.*]] = lshr i40 [[ADD17]], 32
-; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i40 [[SHR19]], [[AND18]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i40 [[SHR19]], [[AND18]]
; CHECK-NEXT: ret i40 [[ADD20]]
;
%and = and i40 %uWord, 366503875925 ; 0x5555555555
%shr = lshr i40 %uWord, 1
%and1 = and i40 %shr, 366503875925
- %add = add nuw i40 %and1, %and
+ %add = add i40 %and1, %and
%and2 = and i40 %add, 219902325555 ; 0x3333333333
%shr3 = lshr i40 %add, 2
%and4 = and i40 %shr3, 219902325555
- %add5 = add nuw nsw i40 %and4, %and2
+ %add5 = add i40 %and4, %and2
%and6 = and i40 %add5, 67818775935 ; 0x0F0F0F0F0F
%shr7 = lshr i40 %add5, 4
%and8 = and i40 %shr7, 67818775935
- %add9 = add nuw nsw i40 %and8, %and6
+ %add9 = add i40 %and8, %and6
%and10 = and i40 %add9, 280371153695 ; 0x00FF00FF00FF (splat for 40-bit)
%shr11 = lshr i40 %add9, 8
%and12 = and i40 %shr11, 280371153695
- %add13 = add nuw nsw i40 %and12, %and10
+ %add13 = add i40 %and12, %and10
%and14 = and i40 %add13, 1099511627775 ; 0x0000FFFF0000FFFF (splat for 40-bit)
%shr15 = lshr i40 %add13, 16
%and16 = and i40 %shr15, 1099511627775
- %add17 = add nuw nsw i40 %and16, %and14
+ %add17 = add i40 %and16, %and14
%and18 = and i40 %add17, 4294967295 ; 0x00000000FFFFFFFF
%shr19 = lshr i40 %add17, 32
- %add20 = add nuw nsw i40 %shr19, %and18
+ %add20 = add i40 %shr19, %and18
ret i40 %add20
}
@@ -1102,51 +1099,51 @@ define i48 @popcnt1_negative_i48(i48 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i48 [[UWORD:%.*]], 93824992236885
; CHECK-NEXT: [[SHR:%.*]] = lshr i48 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i48 [[SHR]], 93824992236885
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i48 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i48 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i48 [[ADD]], 56294995342131
; CHECK-NEXT: [[SHR3:%.*]] = lshr i48 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i48 [[SHR3]], 56294995342131
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i48 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i48 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i48 [[ADD5]], 17361641481615
; CHECK-NEXT: [[SHR7:%.*]] = lshr i48 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i48 [[SHR7]], 17361641481615
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i48 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i48 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i48 [[ADD9]], 71777214294895
; CHECK-NEXT: [[SHR11:%.*]] = lshr i48 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i48 [[SHR11]], 71777214294895
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i48 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i48 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i48 [[ADD13]], -4294967041
; CHECK-NEXT: [[SHR15:%.*]] = lshr i48 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i48 [[SHR15]], -4294967041
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i48 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i48 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i48 [[ADD17]], 4294967295
; CHECK-NEXT: [[SHR19:%.*]] = lshr i48 [[ADD17]], 32
-; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i48 [[SHR19]], [[AND18]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i48 [[SHR19]], [[AND18]]
; CHECK-NEXT: ret i48 [[ADD20]]
;
%and = and i48 %uWord, 93824992236885 ; 0x555555555555
%shr = lshr i48 %uWord, 1
%and1 = and i48 %shr, 93824992236885
- %add = add nuw i48 %and1, %and
+ %add = add i48 %and1, %and
%and2 = and i48 %add, 56294995342131 ; 0x333333333333
%shr3 = lshr i48 %add, 2
%and4 = and i48 %shr3, 56294995342131
- %add5 = add nuw nsw i48 %and4, %and2
+ %add5 = add i48 %and4, %and2
%and6 = and i48 %add5, 17361641481615 ; 0x0F0F0F0F0F0F
%shr7 = lshr i48 %add5, 4
%and8 = and i48 %shr7, 17361641481615
- %add9 = add nuw nsw i48 %and8, %and6
+ %add9 = add i48 %and8, %and6
%and10 = and i48 %add9, 71777214294895 ; 0x00FF00FF00FF
%shr11 = lshr i48 %add9, 8
%and12 = and i48 %shr11, 71777214294895
- %add13 = add nuw nsw i48 %and12, %and10
+ %add13 = add i48 %and12, %and10
%and14 = and i48 %add13, 281470681743615 ; 0x0000FFFF0000FFFF
%shr15 = lshr i48 %add13, 16
%and16 = and i48 %shr15, 281470681743615
- %add17 = add nuw nsw i48 %and16, %and14
+ %add17 = add i48 %and16, %and14
%and18 = and i48 %add17, 4294967295 ; 0x00000000FFFFFFFF
%shr19 = lshr i48 %add17, 32
- %add20 = add nuw nsw i48 %shr19, %and18
+ %add20 = add i48 %shr19, %and18
ret i48 %add20
}
@@ -1156,50 +1153,50 @@ define i56 @popcnt1_negative_i56(i56 noundef %uWord) {
; CHECK-NEXT: [[AND:%.*]] = and i56 [[UWORD:%.*]], 24019198012642645
; CHECK-NEXT: [[SHR:%.*]] = lshr i56 [[UWORD]], 1
; CHECK-NEXT: [[AND1:%.*]] = and i56 [[SHR]], 24019198012642645
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i56 [[AND1]], [[AND]]
+; CHECK-NEXT: [[ADD:%.*]] = add i56 [[AND1]], [[AND]]
; CHECK-NEXT: [[AND2:%.*]] = and i56 [[ADD]], 14411518807585587
; CHECK-NEXT: [[SHR3:%.*]] = lshr i56 [[ADD]], 2
; CHECK-NEXT: [[AND4:%.*]] = and i56 [[SHR3]], 14411518807585587
-; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i56 [[AND4]], [[AND2]]
+; CHECK-NEXT: [[ADD5:%.*]] = add i56 [[AND4]], [[AND2]]
; CHECK-NEXT: [[AND6:%.*]] = and i56 [[ADD5]], 4444132406286415
; CHECK-NEXT: [[SHR7:%.*]] = lshr i56 [[ADD5]], 4
; CHECK-NEXT: [[AND8:%.*]] = and i56 [[SHR7]], 4444132406286415
-; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i56 [[AND8]], [[AND6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i56 [[AND8]], [[AND6]]
; CHECK-NEXT: [[AND10:%.*]] = and i56 [[ADD9]], 18374686479671535
; CHECK-NEXT: [[SHR11:%.*]] = lshr i56 [[ADD9]], 8
; CHECK-NEXT: [[AND12:%.*]] = and i56 [[SHR11]], 18374686479671535
-; CHECK-NEXT: [[ADD13:%.*]] = add nuw nsw i56 [[AND12]], [[AND10]]
+; CHECK-NEXT: [[ADD13:%.*]] = add i56 [[AND12]], [[AND10]]
; CHECK-NEXT: [[AND14:%.*]] = and i56 [[ADD13]], -4278190081
; CHECK-NEXT: [[SHR15:%.*]] = lshr i56 [[ADD13]], 16
; CHECK-NEXT: [[AND16:%.*]] = and i56 [[SHR15]], -4278190081
-; CHECK-NEXT: [[ADD17:%.*]] = add nuw nsw i56 [[AND16]], [[AND14]]
+; CHECK-NEXT: [[ADD17:%.*]] = add i56 [[AND16]], [[AND14]]
; CHECK-NEXT: [[AND18:%.*]] = and i56 [[ADD17]], 4294967295
; CHECK-NEXT: [[SHR19:%.*]] = lshr i56 [[ADD17]], 32
-; CHECK-NEXT: [[ADD20:%.*]] = add nuw nsw i56 [[SHR19]], [[AND18]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i56 [[SHR19]], [[AND18]]
; CHECK-NEXT: ret i56 [[ADD20]]
;
%and = and i56 %uWord, 24019198012642645 ; 0x55555555555555
%shr = lshr i56 %uWord, 1
%and1 = and i56 %shr, 24019198012642645
- %add = add nuw i56 %and1, %and
+ %add = add i56 %and1, %and
%and2 = and i56 %add, 14411518807585587 ; 0x33333333333333
%shr3 = lshr i56 %add, 2
%and4 = and i56 %shr3, 14411518807585587
- %add5 = add nuw nsw i56 %and4, %and2
+ %add5 = add i56 %and4, %and2
%and6 = and i56 %add5, 4444132406286415 ; 0x0F0F0F0F0F0F0F
%shr7 = lshr i56 %add5, 4
%and8 = and i56 %shr7, 4444132406286415
- %add9 = add nuw nsw i56 %and8, %and6
+ %add9 = add i56 %and8, %and6
%and10 = and i56 %add9, 18374686479671535 ; 0x00FF00FF00FF00FF
%shr11 = lshr i56 %add9, 8
%and12 = and i56 %shr11, 18374686479671535
- %add13 = add nuw nsw i56 %and12, %and10
+ %add13 = add i56 %and12, %and10
%and14 = and i56 %add13, 72057589759737855 ; 0x0000FFFF0000FFFF0000FFFF
%shr15 = lshr i56 %add13, 16
%and16 = and i56 %shr15, 72057589759737855
- %add17 = add nuw nsw i56 %and16, %and14
+ %add17 = add i56 %and16, %and14
%and18 = and i56 %add17, 4294967295 ; 0x00000000FFFFFFFF
%shr19 = lshr i56 %add17, 32
- %add20 = add nuw nsw i56 %shr19, %and18
+ %add20 = add i56 %shr19, %and18
ret i56 %add20
}
More information about the llvm-commits
mailing list