[llvm] [AggressiveInstCombine] POPCNT generation for bit-count pattern (PR #177109)

Rohit Aggarwal via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 00:16:04 PDT 2026


https://github.com/rohitaggarwal007 updated https://github.com/llvm/llvm-project/pull/177109

>From cef5ff889c938ff56464f8064d0a74d4b624b368 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 21 Jan 2026 12:52:41 +0530
Subject: [PATCH 1/9] [AggressiveInstCombine] POPCNT generation for bit-count
 pattern

---
 .../AggressiveInstCombine.cpp                 | 165 ++++++++++++++++++
 .../AggressiveInstCombine/popcount.ll         |  79 +++++++++
 2 files changed, 244 insertions(+)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 3341368208c24..94bf8348b4f19 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -372,6 +372,169 @@ static bool tryToRecognizePopCount(Instruction &I) {
   return false;
 }
 
+// Try to recognize below function as popcount intrinsic.
+// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcount(unsigned int i) {
+// uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);
+// uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);
+// uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F);
+// uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);
+// return  (uWord & 0x0000FFFF) + (uWord>>16);
+// }
+static bool tryToRecognizePopCount1(Instruction &I) {
+  if (I.getOpcode() != Instruction::Add)
+    return false;
+
+  Type *Ty = I.getType();
+  if (!Ty->isIntOrIntVectorTy())
+    return false;
+
+  unsigned Len = Ty->getScalarSizeInBits();
+  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+    return false;
+
+  APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
+  APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
+  Value *Op0 = I.getOperand(0);
+  Value *Op1 = I.getOperand(1);
+  Value *LShrOp0;
+  // Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
+  if ((match(Op1, m_LShr(m_Value(LShrOp0), m_SpecificInt(16)))) &&
+      match(Op0, m_And(m_Deferred(LShrOp0), m_SpecificInt(31)))) {
+    Value *ShiftOp0;
+    // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
+    if (match(LShrOp0,
+              m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+                            m_SpecificInt(983055)),
+                      m_And(m_Deferred(ShiftOp0), m_SpecificInt(983055))))) {
+      Value *ShiftOp1;
+      // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
+      if (match(
+              ShiftOp0,
+              m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
+                            m_SpecificInt(117901063)),
+                      m_And(m_Deferred(ShiftOp1), m_SpecificInt(117901063))))) {
+        Value *ShiftOp2;
+        // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+        if (match(
+                ShiftOp1,
+                m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
+                              m_SpecificInt(Mask33)),
+                        m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
+          Value *ShiftOp3;
+          // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
+          // 0x55555555)".
+          if (match(ShiftOp2,
+                    m_c_Add(
+                        m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
+                              m_SpecificInt(Mask55)),
+                        m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
+            LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+            IRBuilder<> Builder(&I);
+            I.replaceAllUsesWith(Builder.CreateIntrinsic(
+                Intrinsic::ctpop, I.getType(), {ShiftOp3}));
+            ++NumPopCountRecognized;
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+// Try to recognize below function as popcount intrinsic.
+// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcnt(unsigned x) {
+// x = x - ((x >> 1) & 0x55555555);
+// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+// x = (x + (x >> 4)) & 0x0F0F0F0F;
+// x = x + (x >> 8);
+// x = x + (x >> 16);
+// return x & 0x0000003F;
+// }
+
+// int popcnt(unsigned x) {
+// x = x - ((x >> 1) & 0x55555555);
+// x = x - 3*((x >> 2) & 0x33333333);
+// x = (x + (x >> 4)) & 0x0F0F0F0F;
+// x = x + (x >> 8);
+// x = x + (x >> 16);
+// return x & 0x0000003F;
+// }
+
+static bool tryToRecognizePopCount2n3(Instruction &I) {
+  if (I.getOpcode() != Instruction::And)
+    return false;
+
+  Type *Ty = I.getType();
+  if (!Ty->isIntOrIntVectorTy())
+    return false;
+
+  unsigned Len = Ty->getScalarSizeInBits();
+  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+    return false;
+
+  Value *Op0 = I.getOperand(0);
+  Value *Op1 = I.getOperand(1);
+  Value *LShrOp0;
+  Value *AddOp1;
+  // Matching "x & 0x0000003F".
+  if ((match(Op0, m_Add(m_Value(LShrOp0), m_Value(AddOp1)))) &&
+      match(Op1, m_SpecificInt(63))) {
+    Value *LShr1;
+    Value *And1;
+    // Matching "x = x + (x >> 16)".
+    if (match(LShrOp0, m_LShr(m_Add(m_Value(LShr1), m_Value(And1)),
+                              m_SpecificInt(16)))) {
+      Value *Add2;
+      // Matching " x = x + (x >> 8)".
+      if (match(LShr1, m_LShr(m_Deferred(And1), m_SpecificInt(8))) &&
+          match(And1, m_c_And(m_Value(Add2), m_SpecificInt(252645135)))) {
+        Value *Add3;
+        // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
+        if (match(Add2, m_c_Add(m_LShr(m_Value(Add3), m_SpecificInt(4)),
+                                m_Deferred(Add3)))) {
+          Value *Sub1;
+          llvm::APInt NegThree(/*BitWidth=*/32, /*Value=*/-3,
+                               /*isSigned=*/true);
+          // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
+          if (match(Add3,
+                    m_c_Add(
+                        m_c_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
+                                m_SpecificInt(858993459)),
+                        m_c_And(m_Deferred(Sub1), m_SpecificInt(858993459)))) ||
+              // Matching "x = x - 3*((x >> 2) & 0x33333333)".
+              match(Add3,
+                    m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
+                                      m_SpecificInt(858993459)),
+                                m_SpecificInt(NegThree)),
+                          m_Deferred(Sub1)))) {
+            Value *Root;
+            if (match(Sub1,
+                      m_Sub(m_Value(Root),
+                            m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),
+                                  m_SpecificInt(1431655765))))) {
+              LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+              IRBuilder<> Builder(&I);
+              I.replaceAllUsesWith(Builder.CreateIntrinsic(
+                  Intrinsic::ctpop, I.getType(), {Root}));
+              ++NumPopCountRecognized;
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 /// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
 /// C2 saturate the value of the fp conversion. The transform is not reversable
 /// as the fptosi.sat is more defined than the input - all values produce a
@@ -1826,6 +1989,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       MadeChange |= foldAnyOrAllBitsSet(I);
       MadeChange |= foldGuardedFunnelShift(I, DT);
       MadeChange |= tryToRecognizePopCount(I);
+      MadeChange |= tryToRecognizePopCount1(I);
+      MadeChange |= tryToRecognizePopCount2n3(I);
       MadeChange |= tryToFPToSat(I, TTI);
       MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
       MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index f56cab1503531..9b4eea4513de9 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -239,3 +239,82 @@ define i32 @popcount64_mask(i64 %x) {
   %13 = trunc nuw nsw i64 %12 to i32
   ret i32 %13
 }
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef range(i32 0, 59) i32 @popcnt1(i32 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %and = and i32 %uWord, 1431655765
+  %shr = lshr i32 %uWord, 1
+  %and1 = and i32 %shr, 1431655765
+  %add = add nuw i32 %and1, %and
+  %and2 = and i32 %add, 858993459
+  %shr3 = lshr i32 %add, 2
+  %and4 = and i32 %shr3, 858993459
+  %add5 = add nuw nsw i32 %and4, %and2
+  %and6 = and i32 %add5, 117901063
+  %shr7 = lshr i32 %add5, 4
+  %and8 = and i32 %shr7, 117901063
+  %add9 = add nuw nsw i32 %and8, %and6
+  %and10 = and i32 %add9, 983055
+  %shr11 = lshr i32 %add9, 8
+  %and12 = and i32 %shr11, 983055
+  %add13 = add nuw nsw i32 %and12, %and10
+  %and14 = and i32 %add13, 31
+  %shr15 = lshr i32 %add13, 16
+  %add16 = add nuw nsw i32 %and14, %shr15
+  ret i32 %add16
+}
+
+define dso_local noundef range(i32 0, 64) i32 @popcnt2(i32 noundef %0) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt2(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %2 = lshr i32 %0, 1
+  %3 = and i32 %2, 1431655765
+  %4 = sub i32 %0, %3
+  %5 = and i32 %4, 858993459
+  %6 = lshr i32 %4, 2
+  %7 = and i32 %6, 858993459
+  %8 = add nuw nsw i32 %7, %5
+  %9 = lshr i32 %8, 4
+  %10 = add nuw nsw i32 %9, %8
+  %11 = and i32 %10, 252645135
+  %12 = lshr i32 %11, 8
+  %13 = add nuw nsw i32 %12, %11
+  %14 = lshr i32 %13, 16
+  %15 = add nuw nsw i32 %14, %13
+  %16 = and i32 %15, 63
+  ret i32 %16
+}
+
+define dso_local noundef range(i32 0, 64) i32 @popcnt3(i32 noundef %0) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt3(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[TMP16]]
+;
+  %2 = lshr i32 %0, 1
+  %3 = and i32 %2, 1431655765
+  %4 = sub i32 %0, %3
+  %5 = lshr i32 %4, 2
+  %6 = and i32 %5, 858993459
+  %7 = mul i32 %6, -3
+  %8 = add i32 %7, %4
+  %9 = lshr i32 %8, 4
+  %10 = add i32 %9, %8
+  %11 = and i32 %10, 252645135
+  %12 = lshr i32 %11, 8
+  %13 = add nuw nsw i32 %12, %11
+  %14 = lshr i32 %13, 16
+  %15 = add nuw nsw i32 %14, %13
+  %16 = and i32 %15, 63
+  ret i32 %16
+}

>From 507c16f86438eb7931f36307c806228d83ae8010 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 11 Feb 2026 16:34:51 +0530
Subject: [PATCH 2/9] Split the PR into two PRs

---
 .../AggressiveInstCombine.cpp                 | 90 -------------------
 .../AggressiveInstCombine/popcount.ll         | 47 ----------
 2 files changed, 137 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 94bf8348b4f19..f8804ba432e78 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -446,95 +446,6 @@ static bool tryToRecognizePopCount1(Instruction &I) {
   return false;
 }
 
-// Try to recognize below function as popcount intrinsic.
-// https://doc.lagout.org/security/Hackers%20Delight.pdf
-// Also used in TargetLowering::expandCTPOP().
-//
-// int popcnt(unsigned x) {
-// x = x - ((x >> 1) & 0x55555555);
-// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-// x = (x + (x >> 4)) & 0x0F0F0F0F;
-// x = x + (x >> 8);
-// x = x + (x >> 16);
-// return x & 0x0000003F;
-// }
-
-// int popcnt(unsigned x) {
-// x = x - ((x >> 1) & 0x55555555);
-// x = x - 3*((x >> 2) & 0x33333333);
-// x = (x + (x >> 4)) & 0x0F0F0F0F;
-// x = x + (x >> 8);
-// x = x + (x >> 16);
-// return x & 0x0000003F;
-// }
-
-static bool tryToRecognizePopCount2n3(Instruction &I) {
-  if (I.getOpcode() != Instruction::And)
-    return false;
-
-  Type *Ty = I.getType();
-  if (!Ty->isIntOrIntVectorTy())
-    return false;
-
-  unsigned Len = Ty->getScalarSizeInBits();
-  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
-    return false;
-
-  Value *Op0 = I.getOperand(0);
-  Value *Op1 = I.getOperand(1);
-  Value *LShrOp0;
-  Value *AddOp1;
-  // Matching "x & 0x0000003F".
-  if ((match(Op0, m_Add(m_Value(LShrOp0), m_Value(AddOp1)))) &&
-      match(Op1, m_SpecificInt(63))) {
-    Value *LShr1;
-    Value *And1;
-    // Matching "x = x + (x >> 16)".
-    if (match(LShrOp0, m_LShr(m_Add(m_Value(LShr1), m_Value(And1)),
-                              m_SpecificInt(16)))) {
-      Value *Add2;
-      // Matching " x = x + (x >> 8)".
-      if (match(LShr1, m_LShr(m_Deferred(And1), m_SpecificInt(8))) &&
-          match(And1, m_c_And(m_Value(Add2), m_SpecificInt(252645135)))) {
-        Value *Add3;
-        // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
-        if (match(Add2, m_c_Add(m_LShr(m_Value(Add3), m_SpecificInt(4)),
-                                m_Deferred(Add3)))) {
-          Value *Sub1;
-          llvm::APInt NegThree(/*BitWidth=*/32, /*Value=*/-3,
-                               /*isSigned=*/true);
-          // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
-          if (match(Add3,
-                    m_c_Add(
-                        m_c_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
-                                m_SpecificInt(858993459)),
-                        m_c_And(m_Deferred(Sub1), m_SpecificInt(858993459)))) ||
-              // Matching "x = x - 3*((x >> 2) & 0x33333333)".
-              match(Add3,
-                    m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
-                                      m_SpecificInt(858993459)),
-                                m_SpecificInt(NegThree)),
-                          m_Deferred(Sub1)))) {
-            Value *Root;
-            if (match(Sub1,
-                      m_Sub(m_Value(Root),
-                            m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),
-                                  m_SpecificInt(1431655765))))) {
-              LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
-              IRBuilder<> Builder(&I);
-              I.replaceAllUsesWith(Builder.CreateIntrinsic(
-                  Intrinsic::ctpop, I.getType(), {Root}));
-              ++NumPopCountRecognized;
-              return true;
-            }
-          }
-        }
-      }
-    }
-  }
-  return false;
-}
-
 /// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
 /// C2 saturate the value of the fp conversion. The transform is not reversable
 /// as the fptosi.sat is more defined than the input - all values produce a
@@ -1990,7 +1901,6 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       MadeChange |= foldGuardedFunnelShift(I, DT);
       MadeChange |= tryToRecognizePopCount(I);
       MadeChange |= tryToRecognizePopCount1(I);
-      MadeChange |= tryToRecognizePopCount2n3(I);
       MadeChange |= tryToFPToSat(I, TTI);
       MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
       MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 9b4eea4513de9..3f5269a41a7f8 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -271,50 +271,3 @@ entry:
   ret i32 %add16
 }
 
-define dso_local noundef range(i32 0, 64) i32 @popcnt2(i32 noundef %0) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt2(
-; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
-; CHECK-NEXT:    ret i32 [[TMP2]]
-;
-  %2 = lshr i32 %0, 1
-  %3 = and i32 %2, 1431655765
-  %4 = sub i32 %0, %3
-  %5 = and i32 %4, 858993459
-  %6 = lshr i32 %4, 2
-  %7 = and i32 %6, 858993459
-  %8 = add nuw nsw i32 %7, %5
-  %9 = lshr i32 %8, 4
-  %10 = add nuw nsw i32 %9, %8
-  %11 = and i32 %10, 252645135
-  %12 = lshr i32 %11, 8
-  %13 = add nuw nsw i32 %12, %11
-  %14 = lshr i32 %13, 16
-  %15 = add nuw nsw i32 %14, %13
-  %16 = and i32 %15, 63
-  ret i32 %16
-}
-
-define dso_local noundef range(i32 0, 64) i32 @popcnt3(i32 noundef %0) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 64) i32 @popcnt3(
-; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr {
-; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0]])
-; CHECK-NEXT:    ret i32 [[TMP16]]
-;
-  %2 = lshr i32 %0, 1
-  %3 = and i32 %2, 1431655765
-  %4 = sub i32 %0, %3
-  %5 = lshr i32 %4, 2
-  %6 = and i32 %5, 858993459
-  %7 = mul i32 %6, -3
-  %8 = add i32 %7, %4
-  %9 = lshr i32 %8, 4
-  %10 = add i32 %9, %8
-  %11 = and i32 %10, 252645135
-  %12 = lshr i32 %11, 8
-  %13 = add nuw nsw i32 %12, %11
-  %14 = lshr i32 %13, 16
-  %15 = add nuw nsw i32 %14, %13
-  %16 = and i32 %15, 63
-  ret i32 %16
-}

>From 669e15134fbd807009adbdc6c95936170ed5301d Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 25 Feb 2026 11:08:45 +0530
Subject: [PATCH 3/9] Update the logic to support generation of i16, i32 and
 i64 popcnt intrinsic.

---
 .../AggressiveInstCombine.cpp                 | 155 ++++++++---
 .../AggressiveInstCombine/popcount.ll         | 246 +++++++++++++++++-
 2 files changed, 351 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index f8804ba432e78..c43f397d8b562 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -376,13 +376,21 @@ static bool tryToRecognizePopCount(Instruction &I) {
 // https://doc.lagout.org/security/Hackers%20Delight.pdf
 // Also used in TargetLowering::expandCTPOP().
 //
-// int popcount(unsigned int i) {
+// int popcount32(unsigned int i) {
 // uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);
 // uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);
 // uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F);
 // uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);
 // return  (uWord & 0x0000FFFF) + (uWord>>16);
 // }
+// int popcount64(unsigned long i) {
+// uWord = (uWord & 0x5555555555555555) + ((uWord>>1) & 0x5555555555555555);
+// uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);
+// uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);
+// uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);
+// return  (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
+// return  (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;
+// }
 static bool tryToRecognizePopCount1(Instruction &I) {
   if (I.getOpcode() != Instruction::Add)
     return false;
@@ -392,54 +400,115 @@ static bool tryToRecognizePopCount1(Instruction &I) {
     return false;
 
   unsigned Len = Ty->getScalarSizeInBits();
-  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+  if (!(Len <= 64 && Len > 8 && Len % 8 == 0))
     return false;
 
   APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
   APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
-  Value *Op0 = I.getOperand(0);
-  Value *Op1 = I.getOperand(1);
+  APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
+  APInt Mask00FF;
+  if (Len <= 16) {
+    Mask00FF = APInt(16, 0x00FF);
+  } else {
+    Mask00FF = APInt::getSplat(Len, APInt(16, 0x00FF));
+  }
+  APInt Mask0000FFFF;
+  if (Len <= 32) {
+    Mask0000FFFF = APInt(32, 0x0000FFFF);
+  } else {
+    Mask0000FFFF = APInt::getSplat(Len, APInt(32, 0x0000FFFF));
+  }
+
+  APInt Mask64 = APInt(64, 0x00000000FFFFFFFF);
+  // Matching "(uWord & 0x00000000FFFFFFFF) + (uWord>>32)".
+  // OR
+  // Matching "(uWord & 0x00000000FFFFFFFF) + ((uWord>>32) &
+  // 0x00000000FFFFFFFF)".
+  Value *ShiftOp;
+  Value *Start = &I;
+  bool is64 = false;
+  if (match(Start,
+            m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
+                          m_SpecificInt(Mask64)),
+                    m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64)))) ||
+      match(Start,
+            m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
+                    m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64))))) {
+    Start = ShiftOp;
+    is64 = true;
+  }
   Value *LShrOp0;
   // Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
-  if ((match(Op1, m_LShr(m_Value(LShrOp0), m_SpecificInt(16)))) &&
-      match(Op0, m_And(m_Deferred(LShrOp0), m_SpecificInt(31)))) {
-    Value *ShiftOp0;
-    // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
-    if (match(LShrOp0,
-              m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
-                            m_SpecificInt(983055)),
-                      m_And(m_Deferred(ShiftOp0), m_SpecificInt(983055))))) {
-      Value *ShiftOp1;
-      // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
-      if (match(
-              ShiftOp0,
-              m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
-                            m_SpecificInt(117901063)),
-                      m_And(m_Deferred(ShiftOp1), m_SpecificInt(117901063))))) {
-        Value *ShiftOp2;
-        // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
-        if (match(
-                ShiftOp1,
-                m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
-                              m_SpecificInt(Mask33)),
-                        m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
-          Value *ShiftOp3;
-          // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
-          // 0x55555555)".
-          if (match(ShiftOp2,
-                    m_c_Add(
-                        m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
-                              m_SpecificInt(Mask55)),
-                        m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
-            LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
-            IRBuilder<> Builder(&I);
-            I.replaceAllUsesWith(Builder.CreateIntrinsic(
-                Intrinsic::ctpop, I.getType(), {ShiftOp3}));
-            ++NumPopCountRecognized;
-            return true;
-          }
-        }
-      }
+  // Matching "(uWord & 0x0000FFFF) + ((uWord>>16) & 0x0000FFFF)".
+  bool test16 = match(
+      Start, m_c_Add(m_And(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
+                           m_SpecificInt(Mask0000FFFF)),
+                     m_And(m_Deferred(LShrOp0), m_SpecificInt(Mask0000FFFF))));
+
+  bool is32 = false;
+  if ((is64 && test16) ||
+      (!is64 && Len == 32 &&
+       (test16 ||
+        match(Start, m_c_Add(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
+                             m_And(m_Deferred(LShrOp0),
+                                   m_SpecificInt(Mask0000FFFF))))))) {
+    Start = LShrOp0;
+
+    is32 = true;
+  }
+  Value *ShiftOp0;
+  // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
+  // OR
+  // Matching "uWord = (uWord & 0x00FF00FF) + (uWord>>8) ;".
+  bool test8 = match(
+      Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+                           m_SpecificInt(Mask00FF)),
+                     m_And(m_Deferred(ShiftOp0), m_SpecificInt(Mask00FF))));
+
+  bool is16 = false;
+  if ((is32 && test8) ||
+      (!is32 && Len == 16 &&
+       (test8 ||
+        match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+                             m_And(m_Deferred(ShiftOp0),
+                                   m_SpecificInt(Mask00FF))))))) {
+    Start = ShiftOp0;
+    is16 = true;
+  }
+
+  Value *ShiftOp1;
+  // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
+  bool test4 =
+      match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
+                                 m_SpecificInt(Mask0F)),
+                           m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))));
+
+  bool is8 = false;
+  if ((is16 && test4)) {
+    Start = ShiftOp1;
+    is8 = true;
+  }
+
+  Value *ShiftOp2;
+  // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+  if (is8 &&
+      match(Start,
+            m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
+                          m_SpecificInt(Mask33)),
+                    m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
+    Value *ShiftOp3;
+    // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
+    // 0x55555555)".
+    if (match(ShiftOp2,
+              m_c_Add(m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
+                            m_SpecificInt(Mask55)),
+                      m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
+      LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+      IRBuilder<> Builder(&I);
+      I.replaceAllUsesWith(
+          Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp3}));
+      ++NumPopCountRecognized;
+      return true;
     }
   }
 
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 3f5269a41a7f8..f161b47428329 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -241,8 +241,8 @@ define i32 @popcount64_mask(i64 %x) {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef range(i32 0, 59) i32 @popcnt1(i32 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1(
+define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(i32 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(
 ; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
@@ -257,17 +257,249 @@ entry:
   %shr3 = lshr i32 %add, 2
   %and4 = and i32 %shr3, 858993459
   %add5 = add nuw nsw i32 %and4, %and2
-  %and6 = and i32 %add5, 117901063
+  %and6 = and i32 %add5, 252645135
   %shr7 = lshr i32 %add5, 4
-  %and8 = and i32 %shr7, 117901063
+  %and8 = and i32 %shr7, 252645135
   %add9 = add nuw nsw i32 %and8, %and6
-  %and10 = and i32 %add9, 983055
+  %and10 = and i32 %add9, 16711935
   %shr11 = lshr i32 %add9, 8
-  %and12 = and i32 %shr11, 983055
+  %and12 = and i32 %shr11, 16711935
   %add13 = add nuw nsw i32 %and12, %and10
-  %and14 = and i32 %add13, 31
+  %and14 = and i32 %add13, 65535
   %shr15 = lshr i32 %add13, 16
   %add16 = add nuw nsw i32 %and14, %shr15
   ret i32 %add16
 }
 
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(i32 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %and = and i32 %uWord, 1431655765
+  %shr = lshr i32 %uWord, 1
+  %and1 = and i32 %shr, 1431655765
+  %add = add nuw i32 %and1, %and
+  %and2 = and i32 %add, 858993459
+  %shr3 = lshr i32 %add, 2
+  %and4 = and i32 %shr3, 858993459
+  %add5 = add nuw nsw i32 %and4, %and2
+  %and6 = and i32 %add5, 252645135
+  %shr7 = lshr i32 %add5, 4
+  %and8 = and i32 %shr7, 252645135
+  %add9 = add nuw nsw i32 %and8, %and6
+  %and10 = and i32 %add9, 16711935
+  %shr11 = lshr i32 %add9, 8
+  %and12 = and i32 %shr11, 16711935
+  %add13 = add nuw nsw i32 %and12, %and10
+  %and14 = and i32 %add13, 65535
+  %shr15 = lshr i32 %add13, 16
+  %and16 = and i32 %shr15, 65535
+  %add17 = add nuw nsw i32 %and14, %and16
+  ret i32 %add17
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef i64 @popcnt1_64(i64 noundef %uWord) local_unnamed_addr {
+; CHECK-LABEL: define dso_local noundef i64 @popcnt1_64(
+; CHECK-SAME: i64 noundef [[UWORD:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[UWORD]])
+; CHECK-NEXT:    ret i64 [[TMP0]]
+;
+entry:
+  %and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
+  %shr = lshr i64 %uWord, 1
+  %and1 = and i64 %shr, 6148914691236517205
+  %add = add nuw i64 %and1, %and
+  %and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
+  %shr3 = lshr i64 %add, 2
+  %and4 = and i64 %shr3, 3689348814741910323
+  %add5 = add nuw nsw i64 %and4, %and2
+  %and6 = and i64 %add5, 1085102592571150095 ; 0F0F0F0F0F0F0F0F
+  %shr7 = lshr i64 %add5, 4
+  %and8 = and i64 %shr7, 1085102592571150095
+  %add9 = add nuw nsw i64 %and8, %and6
+  %and10 = and i64 %add9, 71777214294589695 ; 00FF00FF00FF00FF
+  %shr11 = lshr i64 %add9, 8
+  %and12 = and i64 %shr11, 71777214294589695
+  %add13 = add nuw nsw i64 %and12, %and10
+  %and14 = and i64 %add13, 281470681808895 ; 0000FFFF0000FFFF
+  %shr15 = lshr i64 %add13, 16
+  %and16 = and i64 %shr15, 281470681808895
+  %add17 = add nuw nsw i64 %and16, %and14
+  %and18 = and i64 %add17, 4294967295 ; 00000000FFFFFFFF
+  %shr19 = lshr i64 %add17, 32
+  %add20 = add nuw nsw i64 %and18, %shr19
+  ret i64 %add20
+}
+
+; Test 16-bit popcount pattern (Hacker's Delight)
+define i16 @popcnt1_16(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ctpop.i16(i16 [[UWORD:%.*]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %and = and i16 %uWord, 21845 ; 0x5555
+  %shr = lshr i16 %uWord, 1
+  %and1 = and i16 %shr, 21845
+  %add = add nuw i16 %and1, %and
+  %and2 = and i16 %add, 13107 ; 0x3333
+  %shr3 = lshr i16 %add, 2
+  %and4 = and i16 %shr3, 13107
+  %add5 = add nuw nsw i16 %and4, %and2
+  %and6 = and i16 %add5, 3855 ; 0x0F0F
+  %shr7 = lshr i16 %add5, 4
+  %and8 = and i16 %shr7, 3855
+  %add9 = add nuw nsw i16 %and8, %and6
+  %and10 = and i16 %add9, 255 ; 0x00FF
+  %shr11 = lshr i16 %add9, 8
+  %add13 = add nuw nsw i16 %shr11, %and10
+  ret i16 %add13
+}
+
+; Test 16-bit popcount pattern variant2 (with extra mask on final shift)
+define i16 @popcnt1_16_variant2(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_variant2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ctpop.i16(i16 [[UWORD:%.*]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %and = and i16 %uWord, 21845 ; 0x5555
+  %shr = lshr i16 %uWord, 1
+  %and1 = and i16 %shr, 21845
+  %add = add nuw i16 %and1, %and
+  %and2 = and i16 %add, 13107 ; 0x3333
+  %shr3 = lshr i16 %add, 2
+  %and4 = and i16 %shr3, 13107
+  %add5 = add nuw nsw i16 %and4, %and2
+  %and6 = and i16 %add5, 3855 ; 0x0F0F
+  %shr7 = lshr i16 %add5, 4
+  %and8 = and i16 %shr7, 3855
+  %add9 = add nuw nsw i16 %and8, %and6
+  %and10 = and i16 %add9, 255 ; 0x00FF
+  %shr11 = lshr i16 %add9, 8
+  %and12 = and i16 %shr11, 255
+  %add13 = add nuw nsw i16 %and10, %and12
+  ret i16 %add13
+}
+
+; Vector variant: 16-bit popcount pattern (Hacker's Delight)
+define <8 x i16> @popcnt1_16vec(<8 x i16> %uWord) {
+; CHECK-LABEL: @popcnt1_16vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[UWORD:%.*]])
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %and = and <8 x i16> %uWord, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
+  %shr = lshr <8 x i16> %uWord, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %and1 = and <8 x i16> %shr, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
+  %add = add nuw <8 x i16> %and1, %and
+  %and2 = and <8 x i16> %add, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
+  %shr3 = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %and4 = and <8 x i16> %shr3, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
+  %add5 = add nuw nsw <8 x i16> %and4, %and2
+  %and6 = and <8 x i16> %add5, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
+  %shr7 = lshr <8 x i16> %add5, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  %and8 = and <8 x i16> %shr7, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
+  %add9 = add nuw nsw <8 x i16> %and8, %and6
+  %and10 = and <8 x i16> %add9, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %shr11 = lshr <8 x i16> %add9, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %add13 = add nuw nsw <8 x i16> %shr11, %and10
+  ret <8 x i16> %add13
+}
+
+; Vector variant: 32-bit popcount pattern (Hacker's Delight)
+define <4 x i32> @popcnt1_32vec(<4 x i32> %uWord) {
+; CHECK-LABEL: @popcnt1_32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[UWORD:%.*]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %and = and <4 x i32> %uWord, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
+  %shr = lshr <4 x i32> %uWord, <i32 1, i32 1, i32 1, i32 1>
+  %and1 = and <4 x i32> %shr, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
+  %add = add nuw <4 x i32> %and1, %and
+  %and2 = and <4 x i32> %add, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
+  %shr3 = lshr <4 x i32> %add, <i32 2, i32 2, i32 2, i32 2>
+  %and4 = and <4 x i32> %shr3, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
+  %add5 = add nuw nsw <4 x i32> %and4, %and2
+  %and6 = and <4 x i32> %add5, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
+  %shr7 = lshr <4 x i32> %add5, <i32 4, i32 4, i32 4, i32 4>
+  %and8 = and <4 x i32> %shr7, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
+  %add9 = add nuw nsw <4 x i32> %and8, %and6
+  %and10 = and <4 x i32> %add9, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
+  %shr11 = lshr <4 x i32> %add9, <i32 8, i32 8, i32 8, i32 8>
+  %and12 = and <4 x i32> %shr11, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
+  %add13 = add nuw nsw <4 x i32> %and12, %and10
+  %and14 = and <4 x i32> %add13, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %shr15 = lshr <4 x i32> %add13, <i32 16, i32 16, i32 16, i32 16>
+  %add16 = add nuw nsw <4 x i32> %and14, %shr15
+  ret <4 x i32> %add16
+}
+
+; Test 64-bit popcount pattern variant2 (with extra mask on final shift)
+define i64 @popcnt1_64_variant2(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_variant2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[UWORD:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
+  %shr = lshr i64 %uWord, 1
+  %and1 = and i64 %shr, 6148914691236517205
+  %add = add nuw i64 %and1, %and
+  %and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
+  %shr3 = lshr i64 %add, 2
+  %and4 = and i64 %shr3, 3689348814741910323
+  %add5 = add nuw nsw i64 %and4, %and2
+  %and6 = and i64 %add5, 1085102592571150095 ; 0x0F0F0F0F0F0F0F0F
+  %shr7 = lshr i64 %add5, 4
+  %and8 = and i64 %shr7, 1085102592571150095
+  %add9 = add nuw nsw i64 %and8, %and6
+  %and10 = and i64 %add9, 71777214294589695 ; 0x00FF00FF00FF00FF
+  %shr11 = lshr i64 %add9, 8
+  %and12 = and i64 %shr11, 71777214294589695
+  %add13 = add nuw nsw i64 %and12, %and10
+  %and14 = and i64 %add13, 281470681808895 ; 0x0000FFFF0000FFFF
+  %shr15 = lshr i64 %add13, 16
+  %and16 = and i64 %shr15, 281470681808895
+  %add17 = add nuw nsw i64 %and16, %and14
+  %and18 = and i64 %add17, 4294967295 ; 0x00000000FFFFFFFF
+  %shr19 = lshr i64 %add17, 32
+  %and20 = and i64 %shr19, 4294967295
+  %add21 = add nuw nsw i64 %and18, %and20
+  ret i64 %add21
+}
+
+; Vector variant: 64-bit popcount pattern (Hacker's Delight)
+define <2 x i64> @popcnt1_64vec(<2 x i64> %uWord) {
+; CHECK-LABEL: @popcnt1_64vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[UWORD:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %and = and <2 x i64> %uWord, <i64 6148914691236517205, i64 6148914691236517205>
+  %shr = lshr <2 x i64> %uWord, <i64 1, i64 1>
+  %and1 = and <2 x i64> %shr, <i64 6148914691236517205, i64 6148914691236517205>
+  %add = add nuw <2 x i64> %and1, %and
+  %and2 = and <2 x i64> %add, <i64 3689348814741910323, i64 3689348814741910323>
+  %shr3 = lshr <2 x i64> %add, <i64 2, i64 2>
+  %and4 = and <2 x i64> %shr3, <i64 3689348814741910323, i64 3689348814741910323>
+  %add5 = add nuw nsw <2 x i64> %and4, %and2
+  %and6 = and <2 x i64> %add5, <i64 1085102592571150095, i64 1085102592571150095>
+  %shr7 = lshr <2 x i64> %add5, <i64 4, i64 4>
+  %and8 = and <2 x i64> %shr7, <i64 1085102592571150095, i64 1085102592571150095>
+  %add9 = add nuw nsw <2 x i64> %and8, %and6
+  %and10 = and <2 x i64> %add9, <i64 71777214294589695, i64 71777214294589695>
+  %shr11 = lshr <2 x i64> %add9, <i64 8, i64 8>
+  %and12 = and <2 x i64> %shr11, <i64 71777214294589695, i64 71777214294589695>
+  %add13 = add nuw nsw <2 x i64> %and12, %and10
+  %and14 = and <2 x i64> %add13, <i64 281470681808895, i64 281470681808895>
+  %shr15 = lshr <2 x i64> %add13, <i64 16, i64 16>
+  %and16 = and <2 x i64> %shr15, <i64 281470681808895, i64 281470681808895>
+  %add17 = add nuw nsw <2 x i64> %and16, %and14
+  %and18 = and <2 x i64> %add17, <i64 4294967295, i64 4294967295>
+  %shr19 = lshr <2 x i64> %add17, <i64 32, i64 32>
+  %add20 = add nuw nsw <2 x i64> %and18, %shr19
+  ret <2 x i64> %add20
+}

>From 796bcf76230e397aec6c70d6ad0ad77e8840dee5 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 12 Mar 2026 22:10:05 +0530
Subject: [PATCH 4/9] Fix review comments

---
 .../AggressiveInstCombine.cpp                 | 55 ++++++++-----------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c43f397d8b562..948e09674293b 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -407,7 +407,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
   APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
   APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
   APInt Mask00FF;
-  if (Len <= 16) {
+  if (Len == 16) {
     Mask00FF = APInt(16, 0x00FF);
   } else {
     Mask00FF = APInt::getSplat(Len, APInt(16, 0x00FF));
@@ -426,7 +426,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
   // 0x00000000FFFFFFFF)".
   Value *ShiftOp;
   Value *Start = &I;
-  bool is64 = false;
+  bool Is64 = false;
   if (match(Start,
             m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
                           m_SpecificInt(Mask64)),
@@ -435,64 +435,55 @@ static bool tryToRecognizePopCount1(Instruction &I) {
             m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
                     m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64))))) {
     Start = ShiftOp;
-    is64 = true;
+    Is64 = true;
   }
   Value *LShrOp0;
   // Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
   // Matching "(uWord & 0x0000FFFF) + ((uWord>>16) & 0x0000FFFF)".
-  bool test16 = match(
+  bool Test16 = match(
       Start, m_c_Add(m_And(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
                            m_SpecificInt(Mask0000FFFF)),
                      m_And(m_Deferred(LShrOp0), m_SpecificInt(Mask0000FFFF))));
 
-  bool is32 = false;
-  if ((is64 && test16) ||
-      (!is64 && Len == 32 &&
-       (test16 ||
+  bool Is32 = false;
+  if ((Is64 && Test16) ||
+      (!Is64 && Len == 32 &&
+       (Test16 ||
         match(Start, m_c_Add(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
                              m_And(m_Deferred(LShrOp0),
                                    m_SpecificInt(Mask0000FFFF))))))) {
     Start = LShrOp0;
-
-    is32 = true;
+    Is32 = true;
   }
   Value *ShiftOp0;
   // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
   // OR
   // Matching "uWord = (uWord & 0x00FF00FF) + (uWord>>8) ;".
-  bool test8 = match(
+  bool Test8 = match(
       Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
                            m_SpecificInt(Mask00FF)),
                      m_And(m_Deferred(ShiftOp0), m_SpecificInt(Mask00FF))));
-
-  bool is16 = false;
-  if ((is32 && test8) ||
-      (!is32 && Len == 16 &&
-       (test8 ||
-        match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
-                             m_And(m_Deferred(ShiftOp0),
-                                   m_SpecificInt(Mask00FF))))))) {
-    Start = ShiftOp0;
-    is16 = true;
+  if (!((Is32 && Test8) ||
+        (!Is32 && Len == 16 &&
+         (Test8 ||
+          match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
+                               m_And(m_Deferred(ShiftOp0),
+                                     m_SpecificInt(Mask00FF)))))))) {
+    return false;
   }
 
   Value *ShiftOp1;
   // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
-  bool test4 =
-      match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
-                                 m_SpecificInt(Mask0F)),
-                           m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))));
-
-  bool is8 = false;
-  if ((is16 && test4)) {
-    Start = ShiftOp1;
-    is8 = true;
+  if (!match(ShiftOp0,
+             m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
+                           m_SpecificInt(Mask0F)),
+                     m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))))) {
+    return false;
   }
 
   Value *ShiftOp2;
   // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
-  if (is8 &&
-      match(Start,
+  if (match(ShiftOp1,
             m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
                           m_SpecificInt(Mask33)),
                     m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {

>From 7e761ed69d19f472ba4e8fb0c60674781a737ded Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 12 Mar 2026 22:52:56 +0530
Subject: [PATCH 5/9] Add negative test cases.

---
 .../AggressiveInstCombine/popcount.ll         | 492 ++++++++++++++++++
 1 file changed, 492 insertions(+)

diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index f161b47428329..dfcaa89fd8cf2 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -503,3 +503,495 @@ define <2 x i64> @popcnt1_64vec(<2 x i64> %uWord) {
   %add20 = add nuw nsw <2 x i64> %and18, %shr19
   ret <2 x i64> %add20
 }
+
+; Negative test cases - these should NOT be optimized to llvm.ctpop
+
+; NEGATIVE: i8 type - too small (Len <= 8), bails out at line 402-404
+define i8 @popcnt1_8_negative(i8 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_8_negative(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[UWORD:%.*]], 85
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i8 [[SHR]], 85
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i8 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i8 [[ADD]], 51
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i8 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i8 [[SHR3]], 51
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i8 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i8 [[ADD5]], 15
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i8 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i8 [[SHR7]], 15
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i8 [[AND8]], [[AND6]]
+; CHECK-NEXT:    ret i8 [[ADD9]]
+;
+  %and = and i8 %uWord, 85 ; 0x55
+  %shr = lshr i8 %uWord, 1
+  %and1 = and i8 %shr, 85
+  %add = add nuw i8 %and1, %and
+  %and2 = and i8 %add, 51 ; 0x33
+  %shr3 = lshr i8 %add, 2
+  %and4 = and i8 %shr3, 51
+  %add5 = add nuw nsw i8 %and4, %and2
+  %and6 = and i8 %add5, 15 ; 0x0F
+  %shr7 = lshr i8 %add5, 4
+  %and8 = and i8 %shr7, 15
+  %add9 = add nuw nsw i8 %and8, %and6
+  ret i8 %add9
+}
+
+; NEGATIVE: i128 type - too large (Len > 64), bails out at line 402-404
+define i128 @popcnt1_128_negative(i128 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_128_negative(
+; CHECK-NEXT:    [[AND:%.*]] = and i128 [[UWORD:%.*]], 113427455640312821154458202477256070485
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i128 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i128 [[SHR]], 113427455640312821154458202477256070485
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i128 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i128 [[ADD]], 68056473384187692692674921486353642291
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i128 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i128 [[SHR3]], 68056473384187692692674921486353642291
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i128 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i128 [[ADD5]], 20016609818878733144904388672456953615
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i128 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i128 [[SHR7]], 20016609818878733144904388672456953615
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i128 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i128 [[ADD9]], 1334440654591915542993625911497130241
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i128 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i128 [[SHR11]], 1334440654591915542993625911497130241
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i128 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i128 [[ADD13]], 5192296858534827628530496329220095
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i128 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i128 [[SHR15]], 5192296858534827628530496329220095
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i128 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i128 [[ADD17]], 79228162514264337593543950335
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i128 [[ADD17]], 32
+; CHECK-NEXT:    [[AND20:%.*]] = and i128 [[SHR19]], 79228162514264337593543950335
+; CHECK-NEXT:    [[ADD21:%.*]] = add nuw nsw i128 [[AND18]], [[AND20]]
+; CHECK-NEXT:    [[AND22:%.*]] = and i128 [[ADD21]], -1
+; CHECK-NEXT:    [[SHR23:%.*]] = lshr i128 [[ADD21]], 64
+; CHECK-NEXT:    [[ADD24:%.*]] = add nuw nsw i128 [[AND22]], [[SHR23]]
+; CHECK-NEXT:    ret i128 [[ADD24]]
+;
+  %and = and i128 %uWord, 113427455640312821154458202477256070485 ; 0x55555555555555555555555555555555
+  %shr = lshr i128 %uWord, 1
+  %and1 = and i128 %shr, 113427455640312821154458202477256070485
+  %add = add nuw i128 %and1, %and
+  %and2 = and i128 %add, 68056473384187692692674921486353642291 ; 0x33333333333333333333333333333333
+  %shr3 = lshr i128 %add, 2
+  %and4 = and i128 %shr3, 68056473384187692692674921486353642291
+  %add5 = add nuw nsw i128 %and4, %and2
+  %and6 = and i128 %add5, 20016609818878733144904388672456953615 ; 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
+  %shr7 = lshr i128 %add5, 4
+  %and8 = and i128 %shr7, 20016609818878733144904388672456953615
+  %add9 = add nuw nsw i128 %and8, %and6
+  %and10 = and i128 %add9, 1334440654591915542993625911497130241 ; 0x00FF00FF00FF00FF00FF00FF00FF00FF
+  %shr11 = lshr i128 %add9, 8
+  %and12 = and i128 %shr11, 1334440654591915542993625911497130241
+  %add13 = add nuw nsw i128 %and12, %and10
+  %and14 = and i128 %add13, 5192296858534827628530496329220095 ; 0x0000FFFF0000FFFF0000FFFF0000FFFF
+  %shr15 = lshr i128 %add13, 16
+  %and16 = and i128 %shr15, 5192296858534827628530496329220095
+  %add17 = add nuw nsw i128 %and16, %and14
+  %and18 = and i128 %add17, 79228162514264337593543950335 ; 0x00000000FFFFFFFF00000000FFFFFFFF
+  %shr19 = lshr i128 %add17, 32
+  %and20 = and i128 %shr19, 79228162514264337593543950335
+  %add21 = add nuw nsw i128 %and18, %and20
+  %and22 = and i128 %add21, 340282366920938463463374607431768211455 ; 0x0000000000000000FFFFFFFFFFFFFFFF
+  %shr23 = lshr i128 %add21, 64
+  %add24 = add nuw nsw i128 %and22, %shr23
+  ret i128 %add24
+}
+
+; NEGATIVE: Missing the 0x55 mask step - incomplete pattern, bails out at line 475-487
+define i32 @popcnt1_32_missing_step1(i32 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_32_missing_step1(
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[UWORD:%.*]], 858993459
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[UWORD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[SHR3]], 858993459
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i32 [[ADD5]], 252645135
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[SHR7]], 252645135
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i32 [[ADD9]], 16711935
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[SHR11]], 16711935
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i32 [[ADD13]], 65535
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
+; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT:    ret i32 [[ADD16]]
+;
+  ; Missing: %and = and i32 %uWord, 1431655765
+  ; Missing: %shr = lshr i32 %uWord, 1
+  ; Missing: %and1 = and i32 %shr, 1431655765
+  ; Missing: %add = add nuw i32 %and1, %and
+  ; Starting from step 2 (0x33 mask)
+  %and2 = and i32 %uWord, 858993459
+  %shr3 = lshr i32 %uWord, 2
+  %and4 = and i32 %shr3, 858993459
+  %add5 = add nuw nsw i32 %and4, %and2
+  %and6 = and i32 %add5, 252645135
+  %shr7 = lshr i32 %add5, 4
+  %and8 = and i32 %shr7, 252645135
+  %add9 = add nuw nsw i32 %and8, %and6
+  %and10 = and i32 %add9, 16711935
+  %shr11 = lshr i32 %add9, 8
+  %and12 = and i32 %shr11, 16711935
+  %add13 = add nuw nsw i32 %and12, %and10
+  %and14 = and i32 %add13, 65535
+  %shr15 = lshr i32 %add13, 16
+  %add16 = add nuw nsw i32 %and14, %shr15
+  ret i32 %add16
+}
+
+; NEGATIVE: Missing the 0x0F mask step - incomplete pattern, bails out at line 467-469
+define i32 @popcnt1_32_missing_step3(i32 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_32_missing_step3(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[UWORD:%.*]], 1431655765
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[ADD]], 858993459
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[SHR3]], 858993459
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i32 [[ADD5]], 16711935
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i32 [[ADD5]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[SHR11]], 16711935
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i32 [[ADD13]], 65535
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
+; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT:    ret i32 [[ADD16]]
+;
+  %and = and i32 %uWord, 1431655765
+  %shr = lshr i32 %uWord, 1
+  %and1 = and i32 %shr, 1431655765
+  %add = add nuw i32 %and1, %and
+  %and2 = and i32 %add, 858993459
+  %shr3 = lshr i32 %add, 2
+  %and4 = and i32 %shr3, 858993459
+  %add5 = add nuw nsw i32 %and4, %and2
+  ; Missing: %and6 = and i32 %add5, 252645135
+  ; Missing: %shr7 = lshr i32 %add5, 4
+  ; Missing: %and8 = and i32 %shr7, 252645135
+  ; Missing: %add9 = add nuw nsw i32 %and8, %and6
+  ; Jumping to step 4 (0x00FF00FF mask)
+  %and10 = and i32 %add5, 16711935
+  %shr11 = lshr i32 %add5, 8
+  %and12 = and i32 %shr11, 16711935
+  %add13 = add nuw nsw i32 %and12, %and10
+  %and14 = and i32 %add13, 65535
+  %shr15 = lshr i32 %add13, 16
+  %add16 = add nuw nsw i32 %and14, %shr15
+  ret i32 %add16
+}
+
+; NEGATIVE: Wrong mask value - using 0x44444444 instead of 0x55555555
+define i32 @popcnt1_32_wrong_mask(i32 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_32_wrong_mask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[UWORD:%.*]], 1145324612
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[SHR]], 1145324612
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[ADD]], 858993459
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[SHR3]], 858993459
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i32 [[ADD5]], 252645135
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[SHR7]], 252645135
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i32 [[ADD9]], 16711935
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[SHR11]], 16711935
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i32 [[ADD13]], 65535
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
+; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT:    ret i32 [[ADD16]]
+;
+  %and = and i32 %uWord, 1145324612 ; 0x44444444 instead of 0x55555555
+  %shr = lshr i32 %uWord, 1
+  %and1 = and i32 %shr, 1145324612
+  %add = add nuw i32 %and1, %and
+  %and2 = and i32 %add, 858993459
+  %shr3 = lshr i32 %add, 2
+  %and4 = and i32 %shr3, 858993459
+  %add5 = add nuw nsw i32 %and4, %and2
+  %and6 = and i32 %add5, 252645135
+  %shr7 = lshr i32 %add5, 4
+  %and8 = and i32 %shr7, 252645135
+  %add9 = add nuw nsw i32 %and8, %and6
+  %and10 = and i32 %add9, 16711935
+  %shr11 = lshr i32 %add9, 8
+  %and12 = and i32 %shr11, 16711935
+  %add13 = add nuw nsw i32 %and12, %and10
+  %and14 = and i32 %add13, 65535
+  %shr15 = lshr i32 %add13, 16
+  %add16 = add nuw nsw i32 %and14, %shr15
+  ret i32 %add16
+}
+
+; NEGATIVE: 64-bit - Missing the 0x55 mask step - incomplete pattern
+define i64 @popcnt1_64_missing_step1(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_missing_step1(
+; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[UWORD:%.*]], 3689348814741910323
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[UWORD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
+; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT:    ret i64 [[ADD20]]
+;
+  ; Missing: %and = and i64 %uWord, 6148914691236517205
+  ; Missing: %shr = lshr i64 %uWord, 1
+  ; Missing: %and1 = and i64 %shr, 6148914691236517205
+  ; Missing: %add = add nuw i64 %and1, %and
+  ; Starting from step 2 (0x33 mask)
+  %and2 = and i64 %uWord, 3689348814741910323
+  %shr3 = lshr i64 %uWord, 2
+  %and4 = and i64 %shr3, 3689348814741910323
+  %add5 = add nuw nsw i64 %and4, %and2
+  %and6 = and i64 %add5, 1085102592571150095
+  %shr7 = lshr i64 %add5, 4
+  %and8 = and i64 %shr7, 1085102592571150095
+  %add9 = add nuw nsw i64 %and8, %and6
+  %and10 = and i64 %add9, 71777214294589695
+  %shr11 = lshr i64 %add9, 8
+  %and12 = and i64 %shr11, 71777214294589695
+  %add13 = add nuw nsw i64 %and12, %and10
+  %and14 = and i64 %add13, 281470681808895
+  %shr15 = lshr i64 %add13, 16
+  %and16 = and i64 %shr15, 281470681808895
+  %add17 = add nuw nsw i64 %and16, %and14
+  %and18 = and i64 %add17, 4294967295
+  %shr19 = lshr i64 %add17, 32
+  %add20 = add nuw nsw i64 %and18, %shr19
+  ret i64 %add20
+}
+
+; NEGATIVE: 64-bit - Missing the 0x0F mask step - incomplete pattern
+define i64 @popcnt1_64_missing_step3(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_missing_step3(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[UWORD:%.*]], 6148914691236517205
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i64 [[SHR]], 6148914691236517205
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i64 [[ADD5]], 71777214294589695
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i64 [[ADD5]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
+; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT:    ret i64 [[ADD20]]
+;
+  %and = and i64 %uWord, 6148914691236517205
+  %shr = lshr i64 %uWord, 1
+  %and1 = and i64 %shr, 6148914691236517205
+  %add = add nuw i64 %and1, %and
+  %and2 = and i64 %add, 3689348814741910323
+  %shr3 = lshr i64 %add, 2
+  %and4 = and i64 %shr3, 3689348814741910323
+  %add5 = add nuw nsw i64 %and4, %and2
+  ; Missing: %and6 = and i64 %add5, 1085102592571150095
+  ; Missing: %shr7 = lshr i64 %add5, 4
+  ; Missing: %and8 = and i64 %shr7, 1085102592571150095
+  ; Missing: %add9 = add nuw nsw i64 %and8, %and6
+  ; Jumping to step 4 (0x00FF00FF mask)
+  %and10 = and i64 %add5, 71777214294589695
+  %shr11 = lshr i64 %add5, 8
+  %and12 = and i64 %shr11, 71777214294589695
+  %add13 = add nuw nsw i64 %and12, %and10
+  %and14 = and i64 %add13, 281470681808895
+  %shr15 = lshr i64 %add13, 16
+  %and16 = and i64 %shr15, 281470681808895
+  %add17 = add nuw nsw i64 %and16, %and14
+  %and18 = and i64 %add17, 4294967295
+  %shr19 = lshr i64 %add17, 32
+  %add20 = add nuw nsw i64 %and18, %shr19
+  ret i64 %add20
+}
+
+; NEGATIVE: 64-bit - Wrong mask value - using 0x4444444444444444 instead of 0x5555555555555555
+define i64 @popcnt1_64_wrong_mask(i64 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_64_wrong_mask(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[UWORD:%.*]], 4919131752989213764
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i64 [[SHR]], 4919131752989213764
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
+; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT:    ret i64 [[ADD20]]
+;
+  %and = and i64 %uWord, 4919131752989213764 ; 0x4444444444444444 instead of 0x5555555555555555
+  %shr = lshr i64 %uWord, 1
+  %and1 = and i64 %shr, 4919131752989213764
+  %add = add nuw i64 %and1, %and
+  %and2 = and i64 %add, 3689348814741910323
+  %shr3 = lshr i64 %add, 2
+  %and4 = and i64 %shr3, 3689348814741910323
+  %add5 = add nuw nsw i64 %and4, %and2
+  %and6 = and i64 %add5, 1085102592571150095
+  %shr7 = lshr i64 %add5, 4
+  %and8 = and i64 %shr7, 1085102592571150095
+  %add9 = add nuw nsw i64 %and8, %and6
+  %and10 = and i64 %add9, 71777214294589695
+  %shr11 = lshr i64 %add9, 8
+  %and12 = and i64 %shr11, 71777214294589695
+  %add13 = add nuw nsw i64 %and12, %and10
+  %and14 = and i64 %add13, 281470681808895
+  %shr15 = lshr i64 %add13, 16
+  %and16 = and i64 %shr15, 281470681808895
+  %add17 = add nuw nsw i64 %and16, %and14
+  %and18 = and i64 %add17, 4294967295
+  %shr19 = lshr i64 %add17, 32
+  %add20 = add nuw nsw i64 %and18, %shr19
+  ret i64 %add20
+}
+
+; NEGATIVE: 16-bit - Missing the 0x55 mask step - incomplete pattern
+define i16 @popcnt1_16_missing_step1(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_missing_step1(
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[UWORD:%.*]], 13107
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i16 [[UWORD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i16 [[SHR3]], 13107
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i16 [[ADD5]], 3855
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i16 [[SHR7]], 3855
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i16 [[ADD9]], 255
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT:    ret i16 [[ADD13]]
+;
+  ; Missing: %and = and i16 %uWord, 21845
+  ; Missing: %shr = lshr i16 %uWord, 1
+  ; Missing: %and1 = and i16 %shr, 21845
+  ; Missing: %add = add nuw i16 %and1, %and
+  ; Starting from step 2 (0x33 mask)
+  %and2 = and i16 %uWord, 13107
+  %shr3 = lshr i16 %uWord, 2
+  %and4 = and i16 %shr3, 13107
+  %add5 = add nuw nsw i16 %and4, %and2
+  %and6 = and i16 %add5, 3855
+  %shr7 = lshr i16 %add5, 4
+  %and8 = and i16 %shr7, 3855
+  %add9 = add nuw nsw i16 %and8, %and6
+  %and10 = and i16 %add9, 255
+  %shr11 = lshr i16 %add9, 8
+  %add13 = add nuw nsw i16 %shr11, %and10
+  ret i16 %add13
+}
+
+; NEGATIVE: 16-bit - Missing the 0x0F mask step - incomplete pattern
+define i16 @popcnt1_16_missing_step3(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_missing_step3(
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[UWORD:%.*]], 21845
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[SHR]], 21845
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[ADD]], 13107
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i16 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i16 [[SHR3]], 13107
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i16 [[ADD5]], 255
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i16 [[ADD5]], 8
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT:    ret i16 [[ADD13]]
+;
+  %and = and i16 %uWord, 21845
+  %shr = lshr i16 %uWord, 1
+  %and1 = and i16 %shr, 21845
+  %add = add nuw i16 %and1, %and
+  %and2 = and i16 %add, 13107
+  %shr3 = lshr i16 %add, 2
+  %and4 = and i16 %shr3, 13107
+  %add5 = add nuw nsw i16 %and4, %and2
+  ; Missing: %and6 = and i16 %add5, 3855
+  ; Missing: %shr7 = lshr i16 %add5, 4
+  ; Missing: %and8 = and i16 %shr7, 3855
+  ; Missing: %add9 = add nuw nsw i16 %and8, %and6
+  ; Jumping to step 4 (0x00FF mask)
+  %and10 = and i16 %add5, 255
+  %shr11 = lshr i16 %add5, 8
+  %add13 = add nuw nsw i16 %shr11, %and10
+  ret i16 %add13
+}
+
+; NEGATIVE: 16-bit - Wrong mask value - using 0x4444 instead of 0x5555
+define i16 @popcnt1_16_wrong_mask(i16 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_16_wrong_mask(
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[UWORD:%.*]], 17476
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[SHR]], 17476
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[ADD]], 13107
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i16 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i16 [[SHR3]], 13107
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i16 [[ADD5]], 3855
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i16 [[SHR7]], 3855
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i16 [[ADD9]], 255
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT:    ret i16 [[ADD13]]
+;
+  %and = and i16 %uWord, 17476 ; 0x4444 instead of 0x5555
+  %shr = lshr i16 %uWord, 1
+  %and1 = and i16 %shr, 17476
+  %add = add nuw i16 %and1, %and
+  %and2 = and i16 %add, 13107
+  %shr3 = lshr i16 %add, 2
+  %and4 = and i16 %shr3, 13107
+  %add5 = add nuw nsw i16 %and4, %and2
+  %and6 = and i16 %add5, 3855
+  %shr7 = lshr i16 %add5, 4
+  %and8 = and i16 %shr7, 3855
+  %add9 = add nuw nsw i16 %and8, %and6
+  %and10 = and i16 %add9, 255
+  %shr11 = lshr i16 %add9, 8
+  %add13 = add nuw nsw i16 %shr11, %and10
+  ret i16 %add13
+}

>From 52917117be6236481185aba8d15e66073de2c18d Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Mon, 23 Mar 2026 11:43:28 +0530
Subject: [PATCH 6/9] Implement code review comments

---
 .../AggressiveInstCombine.cpp                 | 132 +++++++-----------
 1 file changed, 53 insertions(+), 79 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 948e09674293b..1dd0d8fd79064 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -373,7 +373,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
 }
 
 // Try to recognize below function as popcount intrinsic.
-// https://doc.lagout.org/security/Hackers%20Delight.pdf
+// Ref. Hacker Delights
 // Also used in TargetLowering::expandCTPOP().
 //
 // int popcount32(unsigned int i) {
@@ -388,7 +388,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
 // uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);
 // uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);
 // uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);
-// return  (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
+// uWord =  (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
 // return  (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;
 // }
 static bool tryToRecognizePopCount1(Instruction &I) {
@@ -400,7 +400,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
     return false;
 
   unsigned Len = Ty->getScalarSizeInBits();
-  if (!(Len <= 64 && Len > 8 && Len % 8 == 0))
+  if (Len > 64 || Len <= 8 || Len % 8 != 0)
     return false;
 
   APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
@@ -420,90 +420,64 @@ static bool tryToRecognizePopCount1(Instruction &I) {
   }
 
   APInt Mask64 = APInt(64, 0x00000000FFFFFFFF);
-  // Matching "(uWord & 0x00000000FFFFFFFF) + (uWord>>32)".
-  // OR
-  // Matching "(uWord & 0x00000000FFFFFFFF) + ((uWord>>32) &
-  // 0x00000000FFFFFFFF)".
   Value *ShiftOp;
   Value *Start = &I;
-  bool Is64 = false;
-  if (match(Start,
-            m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
-                          m_SpecificInt(Mask64)),
-                    m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64)))) ||
-      match(Start,
-            m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(32)),
-                    m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask64))))) {
+  APInt Mask;
+  for (unsigned I = Len; I >= 8; I = I / 2) {
+    switch (I) {
+    case 64:
+      Mask = Mask64;
+      break;
+    case 32:
+      Mask = Mask0000FFFF;
+      break;
+    case 16:
+      Mask = Mask00FF;
+      break;
+    case 8:
+      Mask = Mask0F;
+      break;
+    }
+    // Matching "(uWord & Mask) + (uWord>>I/2)".
+    // OR
+    // Matching "(uWord & Mask) + ((uWord>>I/2) &
+    // 0x00000000FFFFFFFF)".
+    if (Len >= I &&
+        !(match(Start,
+                m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+                              m_SpecificInt(Mask)),
+                        m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) ||
+          match(Start,
+                m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+                        m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask))))))
+      return false;
+
     Start = ShiftOp;
-    Is64 = true;
-  }
-  Value *LShrOp0;
-  // Matching "(uWord & 0x0000FFFF) + (uWord>>16)".
-  // Matching "(uWord & 0x0000FFFF) + ((uWord>>16) & 0x0000FFFF)".
-  bool Test16 = match(
-      Start, m_c_Add(m_And(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
-                           m_SpecificInt(Mask0000FFFF)),
-                     m_And(m_Deferred(LShrOp0), m_SpecificInt(Mask0000FFFF))));
-
-  bool Is32 = false;
-  if ((Is64 && Test16) ||
-      (!Is64 && Len == 32 &&
-       (Test16 ||
-        match(Start, m_c_Add(m_LShr(m_Value(LShrOp0), m_SpecificInt(16)),
-                             m_And(m_Deferred(LShrOp0),
-                                   m_SpecificInt(Mask0000FFFF))))))) {
-    Start = LShrOp0;
-    Is32 = true;
-  }
-  Value *ShiftOp0;
-  // Matching "uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);".
-  // OR
-  // Matching "uWord = (uWord & 0x00FF00FF) + (uWord>>8) ;".
-  bool Test8 = match(
-      Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
-                           m_SpecificInt(Mask00FF)),
-                     m_And(m_Deferred(ShiftOp0), m_SpecificInt(Mask00FF))));
-  if (!((Is32 && Test8) ||
-        (!Is32 && Len == 16 &&
-         (Test8 ||
-          match(Start, m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(8)),
-                               m_And(m_Deferred(ShiftOp0),
-                                     m_SpecificInt(Mask00FF)))))))) {
-    return false;
+    ShiftOp = nullptr;
   }
 
-  Value *ShiftOp1;
-  // Matching "uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F)".
-  if (!match(ShiftOp0,
-             m_c_Add(m_And(m_LShr(m_Value(ShiftOp1), m_SpecificInt(4)),
-                           m_SpecificInt(Mask0F)),
-                     m_And(m_Deferred(ShiftOp1), m_SpecificInt(Mask0F))))) {
+  ShiftOp = nullptr;
+  // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+  if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),
+                                  m_SpecificInt(Mask33)),
+                            m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask33)))))
     return false;
-  }
 
-  Value *ShiftOp2;
-  // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
-  if (match(ShiftOp1,
-            m_c_Add(m_And(m_LShr(m_Value(ShiftOp2), m_SpecificInt(2)),
-                          m_SpecificInt(Mask33)),
-                    m_And(m_Deferred(ShiftOp2), m_SpecificInt(Mask33))))) {
-    Value *ShiftOp3;
-    // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
-    // 0x55555555)".
-    if (match(ShiftOp2,
-              m_c_Add(m_And(m_LShr(m_Value(ShiftOp3), m_SpecificInt(1)),
-                            m_SpecificInt(Mask55)),
-                      m_And(m_Deferred(ShiftOp3), m_SpecificInt(Mask55))))) {
-      LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
-      IRBuilder<> Builder(&I);
-      I.replaceAllUsesWith(
-          Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp3}));
-      ++NumPopCountRecognized;
-      return true;
-    }
-  }
+  Start = ShiftOp;
+  ShiftOp = nullptr;
+  // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
+  // 0x55555555)".
+  if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(1)),
+                                  m_SpecificInt(Mask55)),
+                            m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask55)))))
+    return false;
 
-  return false;
+  LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+  IRBuilder<> Builder(&I);
+  I.replaceAllUsesWith(
+      Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp}));
+  ++NumPopCountRecognized;
+  return true;
 }
 
 /// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and

>From a5707417f1ed783fb31c5db5ad12d290576dfc79 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 24 Mar 2026 18:05:50 +0530
Subject: [PATCH 7/9] Add the power of 2 check

---
 .../AggressiveInstCombine.cpp                 |   4 +
 .../AggressiveInstCombine/popcount.ll         | 208 ++++++++++++++++++
 2 files changed, 212 insertions(+)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 1dd0d8fd79064..41a138974a4ee 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -403,6 +403,10 @@ static bool tryToRecognizePopCount1(Instruction &I) {
   if (Len > 64 || Len <= 8 || Len % 8 != 0)
     return false;
 
+  // Len should be a power of 2 for the loop to work correctly
+  if (!isPowerOf2_32(Len))
+    return false;
+
   APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
   APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
   APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index dfcaa89fd8cf2..21167abbcef8f 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -995,3 +995,211 @@ define i16 @popcnt1_16_wrong_mask(i16 noundef %uWord) {
   %add13 = add nuw nsw i16 %shr11, %and10
   ret i16 %add13
 }
+
+; NEGATIVE: 24-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i24 @popcnt1_negative_i24(i24 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i24(
+; CHECK-NEXT:    [[AND:%.*]] = and i24 [[UWORD:%.*]], 5592405
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i24 [[SHR]], 5592405
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i24 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i24 [[ADD]], 3355443
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i24 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i24 [[SHR3]], 3355443
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i24 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i24 [[ADD5]], 986895
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i24 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i24 [[SHR7]], 986895
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i24 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i24 [[ADD9]], 65535
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i24 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i24 [[SHR11]], 65535
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i24 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i24 [[ADD13]], 255
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i24 [[ADD13]], 16
+; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i24 [[SHR15]], [[AND14]]
+; CHECK-NEXT:    ret i24 [[ADD16]]
+;
+  %and = and i24 %uWord, 5592405 ; 0x555555
+  %shr = lshr i24 %uWord, 1
+  %and1 = and i24 %shr, 5592405
+  %add = add nuw i24 %and1, %and
+  %and2 = and i24 %add, 3355443 ; 0x333333
+  %shr3 = lshr i24 %add, 2
+  %and4 = and i24 %shr3, 3355443
+  %add5 = add nuw nsw i24 %and4, %and2
+  %and6 = and i24 %add5, 986895 ; 0x0F0F0F
+  %shr7 = lshr i24 %add5, 4
+  %and8 = and i24 %shr7, 986895
+  %add9 = add nuw nsw i24 %and8, %and6
+  %and10 = and i24 %add9, 65535 ; 0x00FFFF
+  %shr11 = lshr i24 %add9, 8
+  %and12 = and i24 %shr11, 65535
+  %add13 = add nuw nsw i24 %and12, %and10
+  %and14 = and i24 %add13, 255 ; 0x0000FF
+  %shr15 = lshr i24 %add13, 16
+  %add16 = add nuw nsw i24 %shr15, %and14
+  ret i24 %add16
+}
+
+; NEGATIVE: 40-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i40 @popcnt1_negative_i40(i40 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i40(
+; CHECK-NEXT:    [[AND:%.*]] = and i40 [[UWORD:%.*]], 366503875925
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i40 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i40 [[SHR]], 366503875925
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i40 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i40 [[ADD]], 219902325555
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i40 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i40 [[SHR3]], 219902325555
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i40 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i40 [[ADD5]], 67818775935
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i40 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i40 [[SHR7]], 67818775935
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i40 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i40 [[ADD9]], 280371153695
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i40 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i40 [[SHR11]], 280371153695
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i40 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i40 [[ADD13]], -1
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i40 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i40 [[SHR15]], -1
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i40 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i40 [[ADD17]], 4294967295
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i40 [[ADD17]], 32
+; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i40 [[SHR19]], [[AND18]]
+; CHECK-NEXT:    ret i40 [[ADD20]]
+;
+  %and = and i40 %uWord, 366503875925 ; 0x5555555555
+  %shr = lshr i40 %uWord, 1
+  %and1 = and i40 %shr, 366503875925
+  %add = add nuw i40 %and1, %and
+  %and2 = and i40 %add, 219902325555 ; 0x3333333333
+  %shr3 = lshr i40 %add, 2
+  %and4 = and i40 %shr3, 219902325555
+  %add5 = add nuw nsw i40 %and4, %and2
+  %and6 = and i40 %add5, 67818775935 ; 0x0F0F0F0F0F
+  %shr7 = lshr i40 %add5, 4
+  %and8 = and i40 %shr7, 67818775935
+  %add9 = add nuw nsw i40 %and8, %and6
+  %and10 = and i40 %add9, 280371153695 ; 0x00FF00FF00FF (splat for 40-bit)
+  %shr11 = lshr i40 %add9, 8
+  %and12 = and i40 %shr11, 280371153695
+  %add13 = add nuw nsw i40 %and12, %and10
+  %and14 = and i40 %add13, 1099511627775 ; 0x0000FFFF0000FFFF (splat for 40-bit)
+  %shr15 = lshr i40 %add13, 16
+  %and16 = and i40 %shr15, 1099511627775
+  %add17 = add nuw nsw i40 %and16, %and14
+  %and18 = and i40 %add17, 4294967295 ; 0x00000000FFFFFFFF
+  %shr19 = lshr i40 %add17, 32
+  %add20 = add nuw nsw i40 %shr19, %and18
+  ret i40 %add20
+}
+
+; NEGATIVE: 48-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i48 @popcnt1_negative_i48(i48 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i48(
+; CHECK-NEXT:    [[AND:%.*]] = and i48 [[UWORD:%.*]], 93824992236885
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i48 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i48 [[SHR]], 93824992236885
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i48 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i48 [[ADD]], 56294995342131
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i48 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i48 [[SHR3]], 56294995342131
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i48 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i48 [[ADD5]], 17361641481615
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i48 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i48 [[SHR7]], 17361641481615
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i48 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i48 [[ADD9]], 71777214294895
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i48 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i48 [[SHR11]], 71777214294895
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i48 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i48 [[ADD13]], -4294967041
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i48 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i48 [[SHR15]], -4294967041
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i48 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i48 [[ADD17]], 4294967295
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i48 [[ADD17]], 32
+; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i48 [[SHR19]], [[AND18]]
+; CHECK-NEXT:    ret i48 [[ADD20]]
+;
+  %and = and i48 %uWord, 93824992236885 ; 0x555555555555
+  %shr = lshr i48 %uWord, 1
+  %and1 = and i48 %shr, 93824992236885
+  %add = add nuw i48 %and1, %and
+  %and2 = and i48 %add, 56294995342131 ; 0x333333333333
+  %shr3 = lshr i48 %add, 2
+  %and4 = and i48 %shr3, 56294995342131
+  %add5 = add nuw nsw i48 %and4, %and2
+  %and6 = and i48 %add5, 17361641481615 ; 0x0F0F0F0F0F0F
+  %shr7 = lshr i48 %add5, 4
+  %and8 = and i48 %shr7, 17361641481615
+  %add9 = add nuw nsw i48 %and8, %and6
+  %and10 = and i48 %add9, 71777214294895 ; 0x00FF00FF00FF
+  %shr11 = lshr i48 %add9, 8
+  %and12 = and i48 %shr11, 71777214294895
+  %add13 = add nuw nsw i48 %and12, %and10
+  %and14 = and i48 %add13, 281470681743615 ; 0x0000FFFF0000FFFF
+  %shr15 = lshr i48 %add13, 16
+  %and16 = and i48 %shr15, 281470681743615
+  %add17 = add nuw nsw i48 %and16, %and14
+  %and18 = and i48 %add17, 4294967295 ; 0x00000000FFFFFFFF
+  %shr19 = lshr i48 %add17, 32
+  %add20 = add nuw nsw i48 %shr19, %and18
+  ret i48 %add20
+}
+
+; NEGATIVE: 56-bit popcount pattern - non-power-of-2 bit width (should NOT optimize)
+define i56 @popcnt1_negative_i56(i56 noundef %uWord) {
+; CHECK-LABEL: @popcnt1_negative_i56(
+; CHECK-NEXT:    [[AND:%.*]] = and i56 [[UWORD:%.*]], 24019198012642645
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i56 [[UWORD]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i56 [[SHR]], 24019198012642645
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i56 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i56 [[ADD]], 14411518807585587
+; CHECK-NEXT:    [[SHR3:%.*]] = lshr i56 [[ADD]], 2
+; CHECK-NEXT:    [[AND4:%.*]] = and i56 [[SHR3]], 14411518807585587
+; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i56 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[AND6:%.*]] = and i56 [[ADD5]], 4444132406286415
+; CHECK-NEXT:    [[SHR7:%.*]] = lshr i56 [[ADD5]], 4
+; CHECK-NEXT:    [[AND8:%.*]] = and i56 [[SHR7]], 4444132406286415
+; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i56 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[AND10:%.*]] = and i56 [[ADD9]], 18374686479671535
+; CHECK-NEXT:    [[SHR11:%.*]] = lshr i56 [[ADD9]], 8
+; CHECK-NEXT:    [[AND12:%.*]] = and i56 [[SHR11]], 18374686479671535
+; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i56 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[AND14:%.*]] = and i56 [[ADD13]], -4278190081
+; CHECK-NEXT:    [[SHR15:%.*]] = lshr i56 [[ADD13]], 16
+; CHECK-NEXT:    [[AND16:%.*]] = and i56 [[SHR15]], -4278190081
+; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i56 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[AND18:%.*]] = and i56 [[ADD17]], 4294967295
+; CHECK-NEXT:    [[SHR19:%.*]] = lshr i56 [[ADD17]], 32
+; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i56 [[SHR19]], [[AND18]]
+; CHECK-NEXT:    ret i56 [[ADD20]]
+;
+  %and = and i56 %uWord, 24019198012642645 ; 0x55555555555555
+  %shr = lshr i56 %uWord, 1
+  %and1 = and i56 %shr, 24019198012642645
+  %add = add nuw i56 %and1, %and
+  %and2 = and i56 %add, 14411518807585587 ; 0x33333333333333
+  %shr3 = lshr i56 %add, 2
+  %and4 = and i56 %shr3, 14411518807585587
+  %add5 = add nuw nsw i56 %and4, %and2
+  %and6 = and i56 %add5, 4444132406286415 ; 0x0F0F0F0F0F0F0F
+  %shr7 = lshr i56 %add5, 4
+  %and8 = and i56 %shr7, 4444132406286415
+  %add9 = add nuw nsw i56 %and8, %and6
+  %and10 = and i56 %add9, 18374686479671535 ; 0x00FF00FF00FF00FF
+  %shr11 = lshr i56 %add9, 8
+  %and12 = and i56 %shr11, 18374686479671535
+  %add13 = add nuw nsw i56 %and12, %and10
+  %and14 = and i56 %add13, 72057589759737855 ; 0x0000FFFF0000FFFF0000FFFF
+  %shr15 = lshr i56 %add13, 16
+  %and16 = and i56 %shr15, 72057589759737855
+  %add17 = add nuw nsw i56 %and16, %and14
+  %and18 = and i56 %add17, 4294967295 ; 0x00000000FFFFFFFF
+  %shr19 = lshr i56 %add17, 32
+  %add20 = add nuw nsw i56 %shr19, %and18
+  ret i56 %add20
+}

>From 35974f47215858098416cbbfbfc76905cdfe688c Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 31 Mar 2026 11:53:21 +0530
Subject: [PATCH 8/9] Implement the Splat review comments for efficient Mask
 calculation

---
 .../AggressiveInstCombine.cpp                 | 43 +++----------------
 1 file changed, 7 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 41a138974a4ee..a0d5f8ad87696 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -388,7 +388,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
 // uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);
 // uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);
 // uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);
-// uWord =  (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
+// uWord = (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);
 // return  (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;
 // }
 static bool tryToRecognizePopCount1(Instruction &I) {
@@ -409,45 +409,16 @@ static bool tryToRecognizePopCount1(Instruction &I) {
 
   APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
   APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
-  APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
-  APInt Mask00FF;
-  if (Len == 16) {
-    Mask00FF = APInt(16, 0x00FF);
-  } else {
-    Mask00FF = APInt::getSplat(Len, APInt(16, 0x00FF));
-  }
-  APInt Mask0000FFFF;
-  if (Len <= 32) {
-    Mask0000FFFF = APInt(32, 0x0000FFFF);
-  } else {
-    Mask0000FFFF = APInt::getSplat(Len, APInt(32, 0x0000FFFF));
-  }
 
-  APInt Mask64 = APInt(64, 0x00000000FFFFFFFF);
   Value *ShiftOp;
   Value *Start = &I;
-  APInt Mask;
   for (unsigned I = Len; I >= 8; I = I / 2) {
-    switch (I) {
-    case 64:
-      Mask = Mask64;
-      break;
-    case 32:
-      Mask = Mask0000FFFF;
-      break;
-    case 16:
-      Mask = Mask00FF;
-      break;
-    case 8:
-      Mask = Mask0F;
-      break;
-    }
+    APInt Mask = APInt::getSplat(Len, APInt::getLowBitsSet(I, I / 2));
     // Matching "(uWord & Mask) + (uWord>>I/2)".
     // OR
     // Matching "(uWord & Mask) + ((uWord>>I/2) &
-    // 0x00000000FFFFFFFF)".
-    if (Len >= I &&
-        !(match(Start,
+    // Mask)".
+    if (!(match(Start,
                 m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
                               m_SpecificInt(Mask)),
                         m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) ||
@@ -461,7 +432,7 @@ static bool tryToRecognizePopCount1(Instruction &I) {
   }
 
   ShiftOp = nullptr;
-  // Matching "uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333)".
+  // Matching "uWord = (uWord & Mask33) + ((uWord>>2) & Mask33)".
   if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),
                                   m_SpecificInt(Mask33)),
                             m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask33)))))
@@ -469,8 +440,8 @@ static bool tryToRecognizePopCount1(Instruction &I) {
 
   Start = ShiftOp;
   ShiftOp = nullptr;
-  // Matching "uWord = (uWord & 0x55555555) + ((uWord>>1) &
-  // 0x55555555)".
+  // Matching "uWord = (uWord & Mask55) + ((uWord>>1) &
+  // Mask55)".
   if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(1)),
                                   m_SpecificInt(Mask55)),
                             m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask55)))))

>From 7e0eeaa77ee7955033d659c201876456848a2b57 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 1 Apr 2026 12:45:12 +0530
Subject: [PATCH 9/9] Implement more review comments

---
 .../AggressiveInstCombine.cpp                 |  46 +-
 .../AggressiveInstCombine/popcount.ll         | 399 +++++++++---------
 2 files changed, 219 insertions(+), 226 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index a0d5f8ad87696..885b4d3da2454 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -288,6 +288,16 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {
   return true;
 }
 
+/// Helper function to replace an instruction with a popcount intrinsic.
+/// This creates the ctpop intrinsic and replaces all uses of the instruction.
+static void replaceWithPopCount(Instruction &I, Value *Root) {
+  LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+  IRBuilder<> Builder(&I);
+  I.replaceAllUsesWith(
+      Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
+  ++NumPopCountRecognized;
+}
+
 // Try to recognize below function as popcount intrinsic.
 // This is the "best" algorithm from
 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -357,11 +367,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
           };
 
           if (CheckAndMask()) {
-            LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
-            IRBuilder<> Builder(&I);
-            I.replaceAllUsesWith(
-                Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
-            ++NumPopCountRecognized;
+            replaceWithPopCount(I, Root);
             return true;
           }
         }
@@ -374,8 +380,6 @@ static bool tryToRecognizePopCount(Instruction &I) {
 
 // Try to recognize below function as popcount intrinsic.
 // Ref. Hacker Delights
-// Also used in TargetLowering::expandCTPOP().
-//
 // int popcount32(unsigned int i) {
 // uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);
 // uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);
@@ -418,20 +422,16 @@ static bool tryToRecognizePopCount1(Instruction &I) {
     // OR
     // Matching "(uWord & Mask) + ((uWord>>I/2) &
     // Mask)".
-    if (!(match(Start,
-                m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
-                              m_SpecificInt(Mask)),
-                        m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) ||
-          match(Start,
-                m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
-                        m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask))))))
+    if (!match(Start,
+               m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+                             m_SpecificInt(Mask)),
+                       m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))) &&
+        !match(Start, m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),
+                              m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask)))))
       return false;
-
     Start = ShiftOp;
-    ShiftOp = nullptr;
   }
 
-  ShiftOp = nullptr;
   // Matching "uWord = (uWord & Mask33) + ((uWord>>2) & Mask33)".
   if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),
                                   m_SpecificInt(Mask33)),
@@ -439,19 +439,15 @@ static bool tryToRecognizePopCount1(Instruction &I) {
     return false;
 
   Start = ShiftOp;
-  ShiftOp = nullptr;
   // Matching "uWord = (uWord & Mask55) + ((uWord>>1) &
   // Mask55)".
-  if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(1)),
+  Value *Root;
+  if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(Root), m_SpecificInt(1)),
                                   m_SpecificInt(Mask55)),
-                            m_And(m_Deferred(ShiftOp), m_SpecificInt(Mask55)))))
+                            m_And(m_Deferred(Root), m_SpecificInt(Mask55)))))
     return false;
 
-  LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
-  IRBuilder<> Builder(&I);
-  I.replaceAllUsesWith(
-      Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {ShiftOp}));
-  ++NumPopCountRecognized;
+  replaceWithPopCount(I, Root);
   return true;
 }
 
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 21167abbcef8f..9cdfdfaacb4bf 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -240,10 +240,9 @@ define i32 @popcount64_mask(i64 %x) {
   ret i32 %13
 }
 
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(i32 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32(
-; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+define i32 @popcnt1_32(i32 noundef %uWord) {
+; CHECK-LABEL: define i32 @popcnt1_32(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
 ; CHECK-NEXT:    ret i32 [[TMP0]]
@@ -252,29 +251,28 @@ entry:
   %and = and i32 %uWord, 1431655765
   %shr = lshr i32 %uWord, 1
   %and1 = and i32 %shr, 1431655765
-  %add = add nuw i32 %and1, %and
+  %add = add i32 %and1, %and
   %and2 = and i32 %add, 858993459
   %shr3 = lshr i32 %add, 2
   %and4 = and i32 %shr3, 858993459
-  %add5 = add nuw nsw i32 %and4, %and2
+  %add5 = add i32 %and4, %and2
   %and6 = and i32 %add5, 252645135
   %shr7 = lshr i32 %add5, 4
   %and8 = and i32 %shr7, 252645135
-  %add9 = add nuw nsw i32 %and8, %and6
+  %add9 = add i32 %and8, %and6
   %and10 = and i32 %add9, 16711935
   %shr11 = lshr i32 %add9, 8
   %and12 = and i32 %shr11, 16711935
-  %add13 = add nuw nsw i32 %and12, %and10
+  %add13 = add i32 %and12, %and10
   %and14 = and i32 %add13, 65535
   %shr15 = lshr i32 %add13, 16
-  %add16 = add nuw nsw i32 %and14, %shr15
+  %add16 = add i32 %and14, %shr15
   ret i32 %add16
 }
 
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(i32 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef range(i32 0, 59) i32 @popcnt1_32_variant2(
-; CHECK-SAME: i32 noundef [[UWORD:%.*]]) local_unnamed_addr {
+define i32 @popcnt1_32_variant2(i32 noundef %uWord) {
+; CHECK-LABEL: define i32 @popcnt1_32_variant2(
+; CHECK-SAME: i32 noundef [[UWORD:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[UWORD]])
 ; CHECK-NEXT:    ret i32 [[TMP0]]
@@ -283,30 +281,29 @@ entry:
   %and = and i32 %uWord, 1431655765
   %shr = lshr i32 %uWord, 1
   %and1 = and i32 %shr, 1431655765
-  %add = add nuw i32 %and1, %and
+  %add = add i32 %and1, %and
   %and2 = and i32 %add, 858993459
   %shr3 = lshr i32 %add, 2
   %and4 = and i32 %shr3, 858993459
-  %add5 = add nuw nsw i32 %and4, %and2
+  %add5 = add i32 %and4, %and2
   %and6 = and i32 %add5, 252645135
   %shr7 = lshr i32 %add5, 4
   %and8 = and i32 %shr7, 252645135
-  %add9 = add nuw nsw i32 %and8, %and6
+  %add9 = add i32 %and8, %and6
   %and10 = and i32 %add9, 16711935
   %shr11 = lshr i32 %add9, 8
   %and12 = and i32 %shr11, 16711935
-  %add13 = add nuw nsw i32 %and12, %and10
+  %add13 = add i32 %and12, %and10
   %and14 = and i32 %add13, 65535
   %shr15 = lshr i32 %add13, 16
   %and16 = and i32 %shr15, 65535
-  %add17 = add nuw nsw i32 %and14, %and16
+  %add17 = add i32 %and14, %and16
   ret i32 %add17
 }
 
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define dso_local noundef i64 @popcnt1_64(i64 noundef %uWord) local_unnamed_addr {
-; CHECK-LABEL: define dso_local noundef i64 @popcnt1_64(
-; CHECK-SAME: i64 noundef [[UWORD:%.*]]) local_unnamed_addr {
+define  i64 @popcnt1_64(i64 noundef %uWord) {
+; CHECK-LABEL: define  i64 @popcnt1_64(
+; CHECK-SAME: i64 noundef [[UWORD:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[UWORD]])
 ; CHECK-NEXT:    ret i64 [[TMP0]]
@@ -315,26 +312,26 @@ entry:
   %and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
   %shr = lshr i64 %uWord, 1
   %and1 = and i64 %shr, 6148914691236517205
-  %add = add nuw i64 %and1, %and
+  %add = add i64 %and1, %and
   %and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
   %shr3 = lshr i64 %add, 2
   %and4 = and i64 %shr3, 3689348814741910323
-  %add5 = add nuw nsw i64 %and4, %and2
+  %add5 = add i64 %and4, %and2
   %and6 = and i64 %add5, 1085102592571150095 ; 0F0F0F0F0F0F0F0F
   %shr7 = lshr i64 %add5, 4
   %and8 = and i64 %shr7, 1085102592571150095
-  %add9 = add nuw nsw i64 %and8, %and6
+  %add9 = add i64 %and8, %and6
   %and10 = and i64 %add9, 71777214294589695 ; 00FF00FF00FF00FF
   %shr11 = lshr i64 %add9, 8
   %and12 = and i64 %shr11, 71777214294589695
-  %add13 = add nuw nsw i64 %and12, %and10
+  %add13 = add i64 %and12, %and10
   %and14 = and i64 %add13, 281470681808895 ; 0000FFFF0000FFFF
   %shr15 = lshr i64 %add13, 16
   %and16 = and i64 %shr15, 281470681808895
-  %add17 = add nuw nsw i64 %and16, %and14
+  %add17 = add i64 %and16, %and14
   %and18 = and i64 %add17, 4294967295 ; 00000000FFFFFFFF
   %shr19 = lshr i64 %add17, 32
-  %add20 = add nuw nsw i64 %and18, %shr19
+  %add20 = add i64 %and18, %shr19
   ret i64 %add20
 }
 
@@ -347,18 +344,18 @@ define i16 @popcnt1_16(i16 noundef %uWord) {
   %and = and i16 %uWord, 21845 ; 0x5555
   %shr = lshr i16 %uWord, 1
   %and1 = and i16 %shr, 21845
-  %add = add nuw i16 %and1, %and
+  %add = add i16 %and1, %and
   %and2 = and i16 %add, 13107 ; 0x3333
   %shr3 = lshr i16 %add, 2
   %and4 = and i16 %shr3, 13107
-  %add5 = add nuw nsw i16 %and4, %and2
+  %add5 = add i16 %and4, %and2
   %and6 = and i16 %add5, 3855 ; 0x0F0F
   %shr7 = lshr i16 %add5, 4
   %and8 = and i16 %shr7, 3855
-  %add9 = add nuw nsw i16 %and8, %and6
+  %add9 = add i16 %and8, %and6
   %and10 = and i16 %add9, 255 ; 0x00FF
   %shr11 = lshr i16 %add9, 8
-  %add13 = add nuw nsw i16 %shr11, %and10
+  %add13 = add i16 %shr11, %and10
   ret i16 %add13
 }
 
@@ -371,19 +368,19 @@ define i16 @popcnt1_16_variant2(i16 noundef %uWord) {
   %and = and i16 %uWord, 21845 ; 0x5555
   %shr = lshr i16 %uWord, 1
   %and1 = and i16 %shr, 21845
-  %add = add nuw i16 %and1, %and
+  %add = add i16 %and1, %and
   %and2 = and i16 %add, 13107 ; 0x3333
   %shr3 = lshr i16 %add, 2
   %and4 = and i16 %shr3, 13107
-  %add5 = add nuw nsw i16 %and4, %and2
+  %add5 = add i16 %and4, %and2
   %and6 = and i16 %add5, 3855 ; 0x0F0F
   %shr7 = lshr i16 %add5, 4
   %and8 = and i16 %shr7, 3855
-  %add9 = add nuw nsw i16 %and8, %and6
+  %add9 = add i16 %and8, %and6
   %and10 = and i16 %add9, 255 ; 0x00FF
   %shr11 = lshr i16 %add9, 8
   %and12 = and i16 %shr11, 255
-  %add13 = add nuw nsw i16 %and10, %and12
+  %add13 = add i16 %and10, %and12
   ret i16 %add13
 }
 
@@ -396,18 +393,18 @@ define <8 x i16> @popcnt1_16vec(<8 x i16> %uWord) {
   %and = and <8 x i16> %uWord, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
   %shr = lshr <8 x i16> %uWord, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %and1 = and <8 x i16> %shr, <i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845, i16 21845>
-  %add = add nuw <8 x i16> %and1, %and
+  %add = add <8 x i16> %and1, %and
   %and2 = and <8 x i16> %add, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
   %shr3 = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %and4 = and <8 x i16> %shr3, <i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107, i16 13107>
-  %add5 = add nuw nsw <8 x i16> %and4, %and2
+  %add5 = add <8 x i16> %and4, %and2
   %and6 = and <8 x i16> %add5, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
   %shr7 = lshr <8 x i16> %add5, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   %and8 = and <8 x i16> %shr7, <i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855, i16 3855>
-  %add9 = add nuw nsw <8 x i16> %and8, %and6
+  %add9 = add <8 x i16> %and8, %and6
   %and10 = and <8 x i16> %add9, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
   %shr11 = lshr <8 x i16> %add9, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %add13 = add nuw nsw <8 x i16> %shr11, %and10
+  %add13 = add <8 x i16> %shr11, %and10
   ret <8 x i16> %add13
 }
 
@@ -420,22 +417,22 @@ define <4 x i32> @popcnt1_32vec(<4 x i32> %uWord) {
   %and = and <4 x i32> %uWord, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
   %shr = lshr <4 x i32> %uWord, <i32 1, i32 1, i32 1, i32 1>
   %and1 = and <4 x i32> %shr, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
-  %add = add nuw <4 x i32> %and1, %and
+  %add = add <4 x i32> %and1, %and
   %and2 = and <4 x i32> %add, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
   %shr3 = lshr <4 x i32> %add, <i32 2, i32 2, i32 2, i32 2>
   %and4 = and <4 x i32> %shr3, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
-  %add5 = add nuw nsw <4 x i32> %and4, %and2
+  %add5 = add <4 x i32> %and4, %and2
   %and6 = and <4 x i32> %add5, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
   %shr7 = lshr <4 x i32> %add5, <i32 4, i32 4, i32 4, i32 4>
   %and8 = and <4 x i32> %shr7, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
-  %add9 = add nuw nsw <4 x i32> %and8, %and6
+  %add9 = add <4 x i32> %and8, %and6
   %and10 = and <4 x i32> %add9, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
   %shr11 = lshr <4 x i32> %add9, <i32 8, i32 8, i32 8, i32 8>
   %and12 = and <4 x i32> %shr11, <i32 16711935, i32 16711935, i32 16711935, i32 16711935>
-  %add13 = add nuw nsw <4 x i32> %and12, %and10
+  %add13 = add <4 x i32> %and12, %and10
   %and14 = and <4 x i32> %add13, <i32 65535, i32 65535, i32 65535, i32 65535>
   %shr15 = lshr <4 x i32> %add13, <i32 16, i32 16, i32 16, i32 16>
-  %add16 = add nuw nsw <4 x i32> %and14, %shr15
+  %add16 = add <4 x i32> %and14, %shr15
   ret <4 x i32> %add16
 }
 
@@ -448,27 +445,27 @@ define i64 @popcnt1_64_variant2(i64 noundef %uWord) {
   %and = and i64 %uWord, 6148914691236517205 ; 0x5555555555555555
   %shr = lshr i64 %uWord, 1
   %and1 = and i64 %shr, 6148914691236517205
-  %add = add nuw i64 %and1, %and
+  %add = add i64 %and1, %and
   %and2 = and i64 %add, 3689348814741910323 ; 0x3333333333333333
   %shr3 = lshr i64 %add, 2
   %and4 = and i64 %shr3, 3689348814741910323
-  %add5 = add nuw nsw i64 %and4, %and2
+  %add5 = add i64 %and4, %and2
   %and6 = and i64 %add5, 1085102592571150095 ; 0x0F0F0F0F0F0F0F0F
   %shr7 = lshr i64 %add5, 4
   %and8 = and i64 %shr7, 1085102592571150095
-  %add9 = add nuw nsw i64 %and8, %and6
+  %add9 = add i64 %and8, %and6
   %and10 = and i64 %add9, 71777214294589695 ; 0x00FF00FF00FF00FF
   %shr11 = lshr i64 %add9, 8
   %and12 = and i64 %shr11, 71777214294589695
-  %add13 = add nuw nsw i64 %and12, %and10
+  %add13 = add i64 %and12, %and10
   %and14 = and i64 %add13, 281470681808895 ; 0x0000FFFF0000FFFF
   %shr15 = lshr i64 %add13, 16
   %and16 = and i64 %shr15, 281470681808895
-  %add17 = add nuw nsw i64 %and16, %and14
+  %add17 = add i64 %and16, %and14
   %and18 = and i64 %add17, 4294967295 ; 0x00000000FFFFFFFF
   %shr19 = lshr i64 %add17, 32
   %and20 = and i64 %shr19, 4294967295
-  %add21 = add nuw nsw i64 %and18, %and20
+  %add21 = add i64 %and18, %and20
   ret i64 %add21
 }
 
@@ -481,26 +478,26 @@ define <2 x i64> @popcnt1_64vec(<2 x i64> %uWord) {
   %and = and <2 x i64> %uWord, <i64 6148914691236517205, i64 6148914691236517205>
   %shr = lshr <2 x i64> %uWord, <i64 1, i64 1>
   %and1 = and <2 x i64> %shr, <i64 6148914691236517205, i64 6148914691236517205>
-  %add = add nuw <2 x i64> %and1, %and
+  %add = add <2 x i64> %and1, %and
   %and2 = and <2 x i64> %add, <i64 3689348814741910323, i64 3689348814741910323>
   %shr3 = lshr <2 x i64> %add, <i64 2, i64 2>
   %and4 = and <2 x i64> %shr3, <i64 3689348814741910323, i64 3689348814741910323>
-  %add5 = add nuw nsw <2 x i64> %and4, %and2
+  %add5 = add <2 x i64> %and4, %and2
   %and6 = and <2 x i64> %add5, <i64 1085102592571150095, i64 1085102592571150095>
   %shr7 = lshr <2 x i64> %add5, <i64 4, i64 4>
   %and8 = and <2 x i64> %shr7, <i64 1085102592571150095, i64 1085102592571150095>
-  %add9 = add nuw nsw <2 x i64> %and8, %and6
+  %add9 = add <2 x i64> %and8, %and6
   %and10 = and <2 x i64> %add9, <i64 71777214294589695, i64 71777214294589695>
   %shr11 = lshr <2 x i64> %add9, <i64 8, i64 8>
   %and12 = and <2 x i64> %shr11, <i64 71777214294589695, i64 71777214294589695>
-  %add13 = add nuw nsw <2 x i64> %and12, %and10
+  %add13 = add <2 x i64> %and12, %and10
   %and14 = and <2 x i64> %add13, <i64 281470681808895, i64 281470681808895>
   %shr15 = lshr <2 x i64> %add13, <i64 16, i64 16>
   %and16 = and <2 x i64> %shr15, <i64 281470681808895, i64 281470681808895>
-  %add17 = add nuw nsw <2 x i64> %and16, %and14
+  %add17 = add <2 x i64> %and16, %and14
   %and18 = and <2 x i64> %add17, <i64 4294967295, i64 4294967295>
   %shr19 = lshr <2 x i64> %add17, <i64 32, i64 32>
-  %add20 = add nuw nsw <2 x i64> %and18, %shr19
+  %add20 = add <2 x i64> %and18, %shr19
   ret <2 x i64> %add20
 }
 
@@ -512,29 +509,29 @@ define i8 @popcnt1_8_negative(i8 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i8 [[UWORD:%.*]], 85
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i8 [[SHR]], 85
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i8 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i8 [[ADD]], 51
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i8 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i8 [[SHR3]], 51
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i8 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i8 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i8 [[ADD5]], 15
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i8 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i8 [[SHR7]], 15
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i8 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i8 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    ret i8 [[ADD9]]
 ;
   %and = and i8 %uWord, 85 ; 0x55
   %shr = lshr i8 %uWord, 1
   %and1 = and i8 %shr, 85
-  %add = add nuw i8 %and1, %and
+  %add = add i8 %and1, %and
   %and2 = and i8 %add, 51 ; 0x33
   %shr3 = lshr i8 %add, 2
   %and4 = and i8 %shr3, 51
-  %add5 = add nuw nsw i8 %and4, %and2
+  %add5 = add i8 %and4, %and2
   %and6 = and i8 %add5, 15 ; 0x0F
   %shr7 = lshr i8 %add5, 4
   %and8 = and i8 %shr7, 15
-  %add9 = add nuw nsw i8 %and8, %and6
+  %add9 = add i8 %and8, %and6
   ret i8 %add9
 }
 
@@ -544,59 +541,59 @@ define i128 @popcnt1_128_negative(i128 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i128 [[UWORD:%.*]], 113427455640312821154458202477256070485
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i128 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i128 [[SHR]], 113427455640312821154458202477256070485
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i128 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i128 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i128 [[ADD]], 68056473384187692692674921486353642291
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i128 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i128 [[SHR3]], 68056473384187692692674921486353642291
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i128 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i128 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i128 [[ADD5]], 20016609818878733144904388672456953615
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i128 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i128 [[SHR7]], 20016609818878733144904388672456953615
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i128 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i128 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i128 [[ADD9]], 1334440654591915542993625911497130241
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i128 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i128 [[SHR11]], 1334440654591915542993625911497130241
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i128 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i128 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i128 [[ADD13]], 5192296858534827628530496329220095
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i128 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i128 [[SHR15]], 5192296858534827628530496329220095
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i128 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i128 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i128 [[ADD17]], 79228162514264337593543950335
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i128 [[ADD17]], 32
 ; CHECK-NEXT:    [[AND20:%.*]] = and i128 [[SHR19]], 79228162514264337593543950335
-; CHECK-NEXT:    [[ADD21:%.*]] = add nuw nsw i128 [[AND18]], [[AND20]]
+; CHECK-NEXT:    [[ADD21:%.*]] = add i128 [[AND18]], [[AND20]]
 ; CHECK-NEXT:    [[AND22:%.*]] = and i128 [[ADD21]], -1
 ; CHECK-NEXT:    [[SHR23:%.*]] = lshr i128 [[ADD21]], 64
-; CHECK-NEXT:    [[ADD24:%.*]] = add nuw nsw i128 [[AND22]], [[SHR23]]
+; CHECK-NEXT:    [[ADD24:%.*]] = add i128 [[AND22]], [[SHR23]]
 ; CHECK-NEXT:    ret i128 [[ADD24]]
 ;
   %and = and i128 %uWord, 113427455640312821154458202477256070485 ; 0x55555555555555555555555555555555
   %shr = lshr i128 %uWord, 1
   %and1 = and i128 %shr, 113427455640312821154458202477256070485
-  %add = add nuw i128 %and1, %and
+  %add = add i128 %and1, %and
   %and2 = and i128 %add, 68056473384187692692674921486353642291 ; 0x33333333333333333333333333333333
   %shr3 = lshr i128 %add, 2
   %and4 = and i128 %shr3, 68056473384187692692674921486353642291
-  %add5 = add nuw nsw i128 %and4, %and2
+  %add5 = add i128 %and4, %and2
   %and6 = and i128 %add5, 20016609818878733144904388672456953615 ; 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
   %shr7 = lshr i128 %add5, 4
   %and8 = and i128 %shr7, 20016609818878733144904388672456953615
-  %add9 = add nuw nsw i128 %and8, %and6
+  %add9 = add i128 %and8, %and6
   %and10 = and i128 %add9, 1334440654591915542993625911497130241 ; 0x00FF00FF00FF00FF00FF00FF00FF00FF
   %shr11 = lshr i128 %add9, 8
   %and12 = and i128 %shr11, 1334440654591915542993625911497130241
-  %add13 = add nuw nsw i128 %and12, %and10
+  %add13 = add i128 %and12, %and10
   %and14 = and i128 %add13, 5192296858534827628530496329220095 ; 0x0000FFFF0000FFFF0000FFFF0000FFFF
   %shr15 = lshr i128 %add13, 16
   %and16 = and i128 %shr15, 5192296858534827628530496329220095
-  %add17 = add nuw nsw i128 %and16, %and14
+  %add17 = add i128 %and16, %and14
   %and18 = and i128 %add17, 79228162514264337593543950335 ; 0x00000000FFFFFFFF00000000FFFFFFFF
   %shr19 = lshr i128 %add17, 32
   %and20 = and i128 %shr19, 79228162514264337593543950335
-  %add21 = add nuw nsw i128 %and18, %and20
+  %add21 = add i128 %and18, %and20
   %and22 = and i128 %add21, 340282366920938463463374607431768211455 ; 0x0000000000000000FFFFFFFFFFFFFFFF
   %shr23 = lshr i128 %add21, 64
-  %add24 = add nuw nsw i128 %and22, %shr23
+  %add24 = add i128 %and22, %shr23
   ret i128 %add24
 }
 
@@ -606,18 +603,18 @@ define i32 @popcnt1_32_missing_step1(i32 noundef %uWord) {
 ; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[UWORD:%.*]], 858993459
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[UWORD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[SHR3]], 858993459
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i32 [[ADD5]], 252645135
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[SHR7]], 252645135
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i32 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i32 [[ADD9]], 16711935
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[SHR11]], 16711935
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i32 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i32 [[ADD13]], 65535
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
-; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add i32 [[AND14]], [[SHR15]]
 ; CHECK-NEXT:    ret i32 [[ADD16]]
 ;
   ; Missing: %and = and i32 %uWord, 1431655765
@@ -628,18 +625,18 @@ define i32 @popcnt1_32_missing_step1(i32 noundef %uWord) {
   %and2 = and i32 %uWord, 858993459
   %shr3 = lshr i32 %uWord, 2
   %and4 = and i32 %shr3, 858993459
-  %add5 = add nuw nsw i32 %and4, %and2
+  %add5 = add i32 %and4, %and2
   %and6 = and i32 %add5, 252645135
   %shr7 = lshr i32 %add5, 4
   %and8 = and i32 %shr7, 252645135
-  %add9 = add nuw nsw i32 %and8, %and6
+  %add9 = add i32 %and8, %and6
   %and10 = and i32 %add9, 16711935
   %shr11 = lshr i32 %add9, 8
   %and12 = and i32 %shr11, 16711935
-  %add13 = add nuw nsw i32 %and12, %and10
+  %add13 = add i32 %and12, %and10
   %and14 = and i32 %add13, 65535
   %shr15 = lshr i32 %add13, 16
-  %add16 = add nuw nsw i32 %and14, %shr15
+  %add16 = add i32 %and14, %shr15
   ret i32 %add16
 }
 
@@ -649,28 +646,28 @@ define i32 @popcnt1_32_missing_step3(i32 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[UWORD:%.*]], 1431655765
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[SHR]], 1431655765
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[ADD]], 858993459
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[SHR3]], 858993459
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i32 [[ADD5]], 16711935
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i32 [[ADD5]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[SHR11]], 16711935
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i32 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i32 [[ADD13]], 65535
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
-; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add i32 [[AND14]], [[SHR15]]
 ; CHECK-NEXT:    ret i32 [[ADD16]]
 ;
   %and = and i32 %uWord, 1431655765
   %shr = lshr i32 %uWord, 1
   %and1 = and i32 %shr, 1431655765
-  %add = add nuw i32 %and1, %and
+  %add = add i32 %and1, %and
   %and2 = and i32 %add, 858993459
   %shr3 = lshr i32 %add, 2
   %and4 = and i32 %shr3, 858993459
-  %add5 = add nuw nsw i32 %and4, %and2
+  %add5 = add i32 %and4, %and2
   ; Missing: %and6 = and i32 %add5, 252645135
   ; Missing: %shr7 = lshr i32 %add5, 4
   ; Missing: %and8 = and i32 %shr7, 252645135
@@ -679,10 +676,10 @@ define i32 @popcnt1_32_missing_step3(i32 noundef %uWord) {
   %and10 = and i32 %add5, 16711935
   %shr11 = lshr i32 %add5, 8
   %and12 = and i32 %shr11, 16711935
-  %add13 = add nuw nsw i32 %and12, %and10
+  %add13 = add i32 %and12, %and10
   %and14 = and i32 %add13, 65535
   %shr15 = lshr i32 %add13, 16
-  %add16 = add nuw nsw i32 %and14, %shr15
+  %add16 = add i32 %and14, %shr15
   ret i32 %add16
 }
 
@@ -692,43 +689,43 @@ define i32 @popcnt1_32_wrong_mask(i32 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[UWORD:%.*]], 1145324612
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[SHR]], 1145324612
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[ADD]], 858993459
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i32 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[SHR3]], 858993459
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i32 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i32 [[ADD5]], 252645135
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i32 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[SHR7]], 252645135
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i32 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i32 [[ADD9]], 16711935
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i32 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[SHR11]], 16711935
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i32 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i32 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i32 [[ADD13]], 65535
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i32 [[ADD13]], 16
-; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i32 [[AND14]], [[SHR15]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add i32 [[AND14]], [[SHR15]]
 ; CHECK-NEXT:    ret i32 [[ADD16]]
 ;
   %and = and i32 %uWord, 1145324612 ; 0x44444444 instead of 0x55555555
   %shr = lshr i32 %uWord, 1
   %and1 = and i32 %shr, 1145324612
-  %add = add nuw i32 %and1, %and
+  %add = add i32 %and1, %and
   %and2 = and i32 %add, 858993459
   %shr3 = lshr i32 %add, 2
   %and4 = and i32 %shr3, 858993459
-  %add5 = add nuw nsw i32 %and4, %and2
+  %add5 = add i32 %and4, %and2
   %and6 = and i32 %add5, 252645135
   %shr7 = lshr i32 %add5, 4
   %and8 = and i32 %shr7, 252645135
-  %add9 = add nuw nsw i32 %and8, %and6
+  %add9 = add i32 %and8, %and6
   %and10 = and i32 %add9, 16711935
   %shr11 = lshr i32 %add9, 8
   %and12 = and i32 %shr11, 16711935
-  %add13 = add nuw nsw i32 %and12, %and10
+  %add13 = add i32 %and12, %and10
   %and14 = and i32 %add13, 65535
   %shr15 = lshr i32 %add13, 16
-  %add16 = add nuw nsw i32 %and14, %shr15
+  %add16 = add i32 %and14, %shr15
   ret i32 %add16
 }
 
@@ -738,22 +735,22 @@ define i64 @popcnt1_64_missing_step1(i64 noundef %uWord) {
 ; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[UWORD:%.*]], 3689348814741910323
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[UWORD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i64 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i64 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i64 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
-; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT:    [[ADD20:%.*]] = add i64 [[AND18]], [[SHR19]]
 ; CHECK-NEXT:    ret i64 [[ADD20]]
 ;
   ; Missing: %and = and i64 %uWord, 6148914691236517205
@@ -764,22 +761,22 @@ define i64 @popcnt1_64_missing_step1(i64 noundef %uWord) {
   %and2 = and i64 %uWord, 3689348814741910323
   %shr3 = lshr i64 %uWord, 2
   %and4 = and i64 %shr3, 3689348814741910323
-  %add5 = add nuw nsw i64 %and4, %and2
+  %add5 = add i64 %and4, %and2
   %and6 = and i64 %add5, 1085102592571150095
   %shr7 = lshr i64 %add5, 4
   %and8 = and i64 %shr7, 1085102592571150095
-  %add9 = add nuw nsw i64 %and8, %and6
+  %add9 = add i64 %and8, %and6
   %and10 = and i64 %add9, 71777214294589695
   %shr11 = lshr i64 %add9, 8
   %and12 = and i64 %shr11, 71777214294589695
-  %add13 = add nuw nsw i64 %and12, %and10
+  %add13 = add i64 %and12, %and10
   %and14 = and i64 %add13, 281470681808895
   %shr15 = lshr i64 %add13, 16
   %and16 = and i64 %shr15, 281470681808895
-  %add17 = add nuw nsw i64 %and16, %and14
+  %add17 = add i64 %and16, %and14
   %and18 = and i64 %add17, 4294967295
   %shr19 = lshr i64 %add17, 32
-  %add20 = add nuw nsw i64 %and18, %shr19
+  %add20 = add i64 %and18, %shr19
   ret i64 %add20
 }
 
@@ -789,32 +786,32 @@ define i64 @popcnt1_64_missing_step3(i64 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[UWORD:%.*]], 6148914691236517205
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i64 [[SHR]], 6148914691236517205
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i64 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i64 [[ADD5]], 71777214294589695
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i64 [[ADD5]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i64 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
-; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT:    [[ADD20:%.*]] = add i64 [[AND18]], [[SHR19]]
 ; CHECK-NEXT:    ret i64 [[ADD20]]
 ;
   %and = and i64 %uWord, 6148914691236517205
   %shr = lshr i64 %uWord, 1
   %and1 = and i64 %shr, 6148914691236517205
-  %add = add nuw i64 %and1, %and
+  %add = add i64 %and1, %and
   %and2 = and i64 %add, 3689348814741910323
   %shr3 = lshr i64 %add, 2
   %and4 = and i64 %shr3, 3689348814741910323
-  %add5 = add nuw nsw i64 %and4, %and2
+  %add5 = add i64 %and4, %and2
   ; Missing: %and6 = and i64 %add5, 1085102592571150095
   ; Missing: %shr7 = lshr i64 %add5, 4
   ; Missing: %and8 = and i64 %shr7, 1085102592571150095
@@ -823,14 +820,14 @@ define i64 @popcnt1_64_missing_step3(i64 noundef %uWord) {
   %and10 = and i64 %add5, 71777214294589695
   %shr11 = lshr i64 %add5, 8
   %and12 = and i64 %shr11, 71777214294589695
-  %add13 = add nuw nsw i64 %and12, %and10
+  %add13 = add i64 %and12, %and10
   %and14 = and i64 %add13, 281470681808895
   %shr15 = lshr i64 %add13, 16
   %and16 = and i64 %shr15, 281470681808895
-  %add17 = add nuw nsw i64 %and16, %and14
+  %add17 = add i64 %and16, %and14
   %and18 = and i64 %add17, 4294967295
   %shr19 = lshr i64 %add17, 32
-  %add20 = add nuw nsw i64 %and18, %shr19
+  %add20 = add i64 %and18, %shr19
   ret i64 %add20
 }
 
@@ -840,51 +837,51 @@ define i64 @popcnt1_64_wrong_mask(i64 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[UWORD:%.*]], 4919131752989213764
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i64 [[SHR]], 4919131752989213764
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i64 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[ADD]], 3689348814741910323
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[SHR3]], 3689348814741910323
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i64 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i64 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i64 [[ADD5]], 1085102592571150095
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i64 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i64 [[SHR7]], 1085102592571150095
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i64 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i64 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i64 [[ADD9]], 71777214294589695
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i64 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i64 [[SHR11]], 71777214294589695
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i64 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i64 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i64 [[ADD13]], 281470681808895
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i64 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i64 [[SHR15]], 281470681808895
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i64 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i64 [[ADD17]], 4294967295
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i64 [[ADD17]], 32
-; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i64 [[AND18]], [[SHR19]]
+; CHECK-NEXT:    [[ADD20:%.*]] = add i64 [[AND18]], [[SHR19]]
 ; CHECK-NEXT:    ret i64 [[ADD20]]
 ;
   %and = and i64 %uWord, 4919131752989213764 ; 0x4444444444444444 instead of 0x5555555555555555
   %shr = lshr i64 %uWord, 1
   %and1 = and i64 %shr, 4919131752989213764
-  %add = add nuw i64 %and1, %and
+  %add = add i64 %and1, %and
   %and2 = and i64 %add, 3689348814741910323
   %shr3 = lshr i64 %add, 2
   %and4 = and i64 %shr3, 3689348814741910323
-  %add5 = add nuw nsw i64 %and4, %and2
+  %add5 = add i64 %and4, %and2
   %and6 = and i64 %add5, 1085102592571150095
   %shr7 = lshr i64 %add5, 4
   %and8 = and i64 %shr7, 1085102592571150095
-  %add9 = add nuw nsw i64 %and8, %and6
+  %add9 = add i64 %and8, %and6
   %and10 = and i64 %add9, 71777214294589695
   %shr11 = lshr i64 %add9, 8
   %and12 = and i64 %shr11, 71777214294589695
-  %add13 = add nuw nsw i64 %and12, %and10
+  %add13 = add i64 %and12, %and10
   %and14 = and i64 %add13, 281470681808895
   %shr15 = lshr i64 %add13, 16
   %and16 = and i64 %shr15, 281470681808895
-  %add17 = add nuw nsw i64 %and16, %and14
+  %add17 = add i64 %and16, %and14
   %and18 = and i64 %add17, 4294967295
   %shr19 = lshr i64 %add17, 32
-  %add20 = add nuw nsw i64 %and18, %shr19
+  %add20 = add i64 %and18, %shr19
   ret i64 %add20
 }
 
@@ -894,14 +891,14 @@ define i16 @popcnt1_16_missing_step1(i16 noundef %uWord) {
 ; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[UWORD:%.*]], 13107
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i16 [[UWORD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i16 [[SHR3]], 13107
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i16 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i16 [[ADD5]], 3855
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i16 [[SHR7]], 3855
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i16 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i16 [[ADD9]], 255
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i16 [[SHR11]], [[AND10]]
 ; CHECK-NEXT:    ret i16 [[ADD13]]
 ;
   ; Missing: %and = and i16 %uWord, 21845
@@ -912,14 +909,14 @@ define i16 @popcnt1_16_missing_step1(i16 noundef %uWord) {
   %and2 = and i16 %uWord, 13107
   %shr3 = lshr i16 %uWord, 2
   %and4 = and i16 %shr3, 13107
-  %add5 = add nuw nsw i16 %and4, %and2
+  %add5 = add i16 %and4, %and2
   %and6 = and i16 %add5, 3855
   %shr7 = lshr i16 %add5, 4
   %and8 = and i16 %shr7, 3855
-  %add9 = add nuw nsw i16 %and8, %and6
+  %add9 = add i16 %and8, %and6
   %and10 = and i16 %add9, 255
   %shr11 = lshr i16 %add9, 8
-  %add13 = add nuw nsw i16 %shr11, %and10
+  %add13 = add i16 %shr11, %and10
   ret i16 %add13
 }
 
@@ -929,24 +926,24 @@ define i16 @popcnt1_16_missing_step3(i16 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i16 [[UWORD:%.*]], 21845
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[SHR]], 21845
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[ADD]], 13107
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i16 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i16 [[SHR3]], 13107
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i16 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i16 [[ADD5]], 255
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i16 [[ADD5]], 8
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i16 [[SHR11]], [[AND10]]
 ; CHECK-NEXT:    ret i16 [[ADD13]]
 ;
   %and = and i16 %uWord, 21845
   %shr = lshr i16 %uWord, 1
   %and1 = and i16 %shr, 21845
-  %add = add nuw i16 %and1, %and
+  %add = add i16 %and1, %and
   %and2 = and i16 %add, 13107
   %shr3 = lshr i16 %add, 2
   %and4 = and i16 %shr3, 13107
-  %add5 = add nuw nsw i16 %and4, %and2
+  %add5 = add i16 %and4, %and2
   ; Missing: %and6 = and i16 %add5, 3855
   ; Missing: %shr7 = lshr i16 %add5, 4
   ; Missing: %and8 = and i16 %shr7, 3855
@@ -954,7 +951,7 @@ define i16 @popcnt1_16_missing_step3(i16 noundef %uWord) {
   ; Jumping to step 4 (0x00FF mask)
   %and10 = and i16 %add5, 255
   %shr11 = lshr i16 %add5, 8
-  %add13 = add nuw nsw i16 %shr11, %and10
+  %add13 = add i16 %shr11, %and10
   ret i16 %add13
 }
 
@@ -964,35 +961,35 @@ define i16 @popcnt1_16_wrong_mask(i16 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i16 [[UWORD:%.*]], 17476
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[SHR]], 17476
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i16 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[ADD]], 13107
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i16 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i16 [[SHR3]], 13107
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i16 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i16 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i16 [[ADD5]], 3855
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i16 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i16 [[SHR7]], 3855
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i16 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i16 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i16 [[ADD9]], 255
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i16 [[ADD9]], 8
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i16 [[SHR11]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i16 [[SHR11]], [[AND10]]
 ; CHECK-NEXT:    ret i16 [[ADD13]]
 ;
   %and = and i16 %uWord, 17476 ; 0x4444 instead of 0x5555
   %shr = lshr i16 %uWord, 1
   %and1 = and i16 %shr, 17476
-  %add = add nuw i16 %and1, %and
+  %add = add i16 %and1, %and
   %and2 = and i16 %add, 13107
   %shr3 = lshr i16 %add, 2
   %and4 = and i16 %shr3, 13107
-  %add5 = add nuw nsw i16 %and4, %and2
+  %add5 = add i16 %and4, %and2
   %and6 = and i16 %add5, 3855
   %shr7 = lshr i16 %add5, 4
   %and8 = and i16 %shr7, 3855
-  %add9 = add nuw nsw i16 %and8, %and6
+  %add9 = add i16 %and8, %and6
   %and10 = and i16 %add9, 255
   %shr11 = lshr i16 %add9, 8
-  %add13 = add nuw nsw i16 %shr11, %and10
+  %add13 = add i16 %shr11, %and10
   ret i16 %add13
 }
 
@@ -1002,43 +999,43 @@ define i24 @popcnt1_negative_i24(i24 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i24 [[UWORD:%.*]], 5592405
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i24 [[SHR]], 5592405
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i24 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i24 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i24 [[ADD]], 3355443
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i24 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i24 [[SHR3]], 3355443
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i24 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i24 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i24 [[ADD5]], 986895
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i24 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i24 [[SHR7]], 986895
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i24 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i24 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i24 [[ADD9]], 65535
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i24 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i24 [[SHR11]], 65535
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i24 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i24 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i24 [[ADD13]], 255
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i24 [[ADD13]], 16
-; CHECK-NEXT:    [[ADD16:%.*]] = add nuw nsw i24 [[SHR15]], [[AND14]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add i24 [[SHR15]], [[AND14]]
 ; CHECK-NEXT:    ret i24 [[ADD16]]
 ;
   %and = and i24 %uWord, 5592405 ; 0x555555
   %shr = lshr i24 %uWord, 1
   %and1 = and i24 %shr, 5592405
-  %add = add nuw i24 %and1, %and
+  %add = add i24 %and1, %and
   %and2 = and i24 %add, 3355443 ; 0x333333
   %shr3 = lshr i24 %add, 2
   %and4 = and i24 %shr3, 3355443
-  %add5 = add nuw nsw i24 %and4, %and2
+  %add5 = add i24 %and4, %and2
   %and6 = and i24 %add5, 986895 ; 0x0F0F0F
   %shr7 = lshr i24 %add5, 4
   %and8 = and i24 %shr7, 986895
-  %add9 = add nuw nsw i24 %and8, %and6
+  %add9 = add i24 %and8, %and6
   %and10 = and i24 %add9, 65535 ; 0x00FFFF
   %shr11 = lshr i24 %add9, 8
   %and12 = and i24 %shr11, 65535
-  %add13 = add nuw nsw i24 %and12, %and10
+  %add13 = add i24 %and12, %and10
   %and14 = and i24 %add13, 255 ; 0x0000FF
   %shr15 = lshr i24 %add13, 16
-  %add16 = add nuw nsw i24 %shr15, %and14
+  %add16 = add i24 %shr15, %and14
   ret i24 %add16
 }
 
@@ -1048,51 +1045,51 @@ define i40 @popcnt1_negative_i40(i40 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i40 [[UWORD:%.*]], 366503875925
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i40 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i40 [[SHR]], 366503875925
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i40 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i40 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i40 [[ADD]], 219902325555
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i40 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i40 [[SHR3]], 219902325555
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i40 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i40 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i40 [[ADD5]], 67818775935
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i40 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i40 [[SHR7]], 67818775935
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i40 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i40 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i40 [[ADD9]], 280371153695
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i40 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i40 [[SHR11]], 280371153695
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i40 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i40 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i40 [[ADD13]], -1
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i40 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i40 [[SHR15]], -1
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i40 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i40 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i40 [[ADD17]], 4294967295
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i40 [[ADD17]], 32
-; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i40 [[SHR19]], [[AND18]]
+; CHECK-NEXT:    [[ADD20:%.*]] = add i40 [[SHR19]], [[AND18]]
 ; CHECK-NEXT:    ret i40 [[ADD20]]
 ;
   %and = and i40 %uWord, 366503875925 ; 0x5555555555
   %shr = lshr i40 %uWord, 1
   %and1 = and i40 %shr, 366503875925
-  %add = add nuw i40 %and1, %and
+  %add = add i40 %and1, %and
   %and2 = and i40 %add, 219902325555 ; 0x3333333333
   %shr3 = lshr i40 %add, 2
   %and4 = and i40 %shr3, 219902325555
-  %add5 = add nuw nsw i40 %and4, %and2
+  %add5 = add i40 %and4, %and2
   %and6 = and i40 %add5, 67818775935 ; 0x0F0F0F0F0F
   %shr7 = lshr i40 %add5, 4
   %and8 = and i40 %shr7, 67818775935
-  %add9 = add nuw nsw i40 %and8, %and6
+  %add9 = add i40 %and8, %and6
   %and10 = and i40 %add9, 280371153695 ; 0x00FF00FF00FF (splat for 40-bit)
   %shr11 = lshr i40 %add9, 8
   %and12 = and i40 %shr11, 280371153695
-  %add13 = add nuw nsw i40 %and12, %and10
+  %add13 = add i40 %and12, %and10
   %and14 = and i40 %add13, 1099511627775 ; 0x0000FFFF0000FFFF (splat for 40-bit)
   %shr15 = lshr i40 %add13, 16
   %and16 = and i40 %shr15, 1099511627775
-  %add17 = add nuw nsw i40 %and16, %and14
+  %add17 = add i40 %and16, %and14
   %and18 = and i40 %add17, 4294967295 ; 0x00000000FFFFFFFF
   %shr19 = lshr i40 %add17, 32
-  %add20 = add nuw nsw i40 %shr19, %and18
+  %add20 = add i40 %shr19, %and18
   ret i40 %add20
 }
 
@@ -1102,51 +1099,51 @@ define i48 @popcnt1_negative_i48(i48 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i48 [[UWORD:%.*]], 93824992236885
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i48 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i48 [[SHR]], 93824992236885
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i48 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i48 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i48 [[ADD]], 56294995342131
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i48 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i48 [[SHR3]], 56294995342131
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i48 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i48 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i48 [[ADD5]], 17361641481615
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i48 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i48 [[SHR7]], 17361641481615
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i48 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i48 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i48 [[ADD9]], 71777214294895
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i48 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i48 [[SHR11]], 71777214294895
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i48 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i48 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i48 [[ADD13]], -4294967041
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i48 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i48 [[SHR15]], -4294967041
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i48 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i48 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i48 [[ADD17]], 4294967295
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i48 [[ADD17]], 32
-; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i48 [[SHR19]], [[AND18]]
+; CHECK-NEXT:    [[ADD20:%.*]] = add i48 [[SHR19]], [[AND18]]
 ; CHECK-NEXT:    ret i48 [[ADD20]]
 ;
   %and = and i48 %uWord, 93824992236885 ; 0x555555555555
   %shr = lshr i48 %uWord, 1
   %and1 = and i48 %shr, 93824992236885
-  %add = add nuw i48 %and1, %and
+  %add = add i48 %and1, %and
   %and2 = and i48 %add, 56294995342131 ; 0x333333333333
   %shr3 = lshr i48 %add, 2
   %and4 = and i48 %shr3, 56294995342131
-  %add5 = add nuw nsw i48 %and4, %and2
+  %add5 = add i48 %and4, %and2
   %and6 = and i48 %add5, 17361641481615 ; 0x0F0F0F0F0F0F
   %shr7 = lshr i48 %add5, 4
   %and8 = and i48 %shr7, 17361641481615
-  %add9 = add nuw nsw i48 %and8, %and6
+  %add9 = add i48 %and8, %and6
   %and10 = and i48 %add9, 71777214294895 ; 0x00FF00FF00FF
   %shr11 = lshr i48 %add9, 8
   %and12 = and i48 %shr11, 71777214294895
-  %add13 = add nuw nsw i48 %and12, %and10
+  %add13 = add i48 %and12, %and10
   %and14 = and i48 %add13, 281470681743615 ; 0x0000FFFF0000FFFF
   %shr15 = lshr i48 %add13, 16
   %and16 = and i48 %shr15, 281470681743615
-  %add17 = add nuw nsw i48 %and16, %and14
+  %add17 = add i48 %and16, %and14
   %and18 = and i48 %add17, 4294967295 ; 0x00000000FFFFFFFF
   %shr19 = lshr i48 %add17, 32
-  %add20 = add nuw nsw i48 %shr19, %and18
+  %add20 = add i48 %shr19, %and18
   ret i48 %add20
 }
 
@@ -1156,50 +1153,50 @@ define i56 @popcnt1_negative_i56(i56 noundef %uWord) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i56 [[UWORD:%.*]], 24019198012642645
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i56 [[UWORD]], 1
 ; CHECK-NEXT:    [[AND1:%.*]] = and i56 [[SHR]], 24019198012642645
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw i56 [[AND1]], [[AND]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i56 [[AND1]], [[AND]]
 ; CHECK-NEXT:    [[AND2:%.*]] = and i56 [[ADD]], 14411518807585587
 ; CHECK-NEXT:    [[SHR3:%.*]] = lshr i56 [[ADD]], 2
 ; CHECK-NEXT:    [[AND4:%.*]] = and i56 [[SHR3]], 14411518807585587
-; CHECK-NEXT:    [[ADD5:%.*]] = add nuw nsw i56 [[AND4]], [[AND2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add i56 [[AND4]], [[AND2]]
 ; CHECK-NEXT:    [[AND6:%.*]] = and i56 [[ADD5]], 4444132406286415
 ; CHECK-NEXT:    [[SHR7:%.*]] = lshr i56 [[ADD5]], 4
 ; CHECK-NEXT:    [[AND8:%.*]] = and i56 [[SHR7]], 4444132406286415
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i56 [[AND8]], [[AND6]]
+; CHECK-NEXT:    [[ADD9:%.*]] = add i56 [[AND8]], [[AND6]]
 ; CHECK-NEXT:    [[AND10:%.*]] = and i56 [[ADD9]], 18374686479671535
 ; CHECK-NEXT:    [[SHR11:%.*]] = lshr i56 [[ADD9]], 8
 ; CHECK-NEXT:    [[AND12:%.*]] = and i56 [[SHR11]], 18374686479671535
-; CHECK-NEXT:    [[ADD13:%.*]] = add nuw nsw i56 [[AND12]], [[AND10]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add i56 [[AND12]], [[AND10]]
 ; CHECK-NEXT:    [[AND14:%.*]] = and i56 [[ADD13]], -4278190081
 ; CHECK-NEXT:    [[SHR15:%.*]] = lshr i56 [[ADD13]], 16
 ; CHECK-NEXT:    [[AND16:%.*]] = and i56 [[SHR15]], -4278190081
-; CHECK-NEXT:    [[ADD17:%.*]] = add nuw nsw i56 [[AND16]], [[AND14]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add i56 [[AND16]], [[AND14]]
 ; CHECK-NEXT:    [[AND18:%.*]] = and i56 [[ADD17]], 4294967295
 ; CHECK-NEXT:    [[SHR19:%.*]] = lshr i56 [[ADD17]], 32
-; CHECK-NEXT:    [[ADD20:%.*]] = add nuw nsw i56 [[SHR19]], [[AND18]]
+; CHECK-NEXT:    [[ADD20:%.*]] = add i56 [[SHR19]], [[AND18]]
 ; CHECK-NEXT:    ret i56 [[ADD20]]
 ;
   %and = and i56 %uWord, 24019198012642645 ; 0x55555555555555
   %shr = lshr i56 %uWord, 1
   %and1 = and i56 %shr, 24019198012642645
-  %add = add nuw i56 %and1, %and
+  %add = add i56 %and1, %and
   %and2 = and i56 %add, 14411518807585587 ; 0x33333333333333
   %shr3 = lshr i56 %add, 2
   %and4 = and i56 %shr3, 14411518807585587
-  %add5 = add nuw nsw i56 %and4, %and2
+  %add5 = add i56 %and4, %and2
   %and6 = and i56 %add5, 4444132406286415 ; 0x0F0F0F0F0F0F0F
   %shr7 = lshr i56 %add5, 4
   %and8 = and i56 %shr7, 4444132406286415
-  %add9 = add nuw nsw i56 %and8, %and6
+  %add9 = add i56 %and8, %and6
   %and10 = and i56 %add9, 18374686479671535 ; 0x00FF00FF00FF00FF
   %shr11 = lshr i56 %add9, 8
   %and12 = and i56 %shr11, 18374686479671535
-  %add13 = add nuw nsw i56 %and12, %and10
+  %add13 = add i56 %and12, %and10
   %and14 = and i56 %add13, 72057589759737855 ; 0x0000FFFF0000FFFF0000FFFF
   %shr15 = lshr i56 %add13, 16
   %and16 = and i56 %shr15, 72057589759737855
-  %add17 = add nuw nsw i56 %and16, %and14
+  %add17 = add i56 %and16, %and14
   %and18 = and i56 %add17, 4294967295 ; 0x00000000FFFFFFFF
   %shr19 = lshr i56 %add17, 32
-  %add20 = add nuw nsw i56 %shr19, %and18
+  %add20 = add i56 %shr19, %and18
   ret i56 %add20
 }



More information about the llvm-commits mailing list