[llvm] users/goldsteinn/ctpop of not (PR #77859)

Thu Jan 11 17:07:00 PST 2024

https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/77859

- Add tests for folding `(add/sub/disjoint_or/icmp C, (ctpop (not x)))`; NFC
- Add folds for `(add/sub/disjoint_or/icmp C, (ctpop (not x)))`


>From af467a125a8640591ae76e4980c84b0750fee85b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 11 Jan 2024 15:41:06 -0800
Subject: [PATCH 1/2] Add tests for folding `(add/sub/disjoint_or/icmp C,
 (ctpop (not x)))`; NFC

---
 .../InstCombine/fold-ctpop-of-not.ll          | 174 ++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll

diff --git a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
new file mode 100644
index 00000000000000..16cfb5aad3f903
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i8 @llvm.ctpop.i8(i8)
+declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
+
+define i8 @fold_sub_c_ctpop(i8 %x) {
+; CHECK-LABEL: @fold_sub_c_ctpop(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw i8 12, [[CNT]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = sub i8 12, %cnt
+  ret i8 %r
+}
+
+define i8 @fold_sub_var_ctpop_fail(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_sub_var_ctpop_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[Y:%.*]], [[CNT]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = sub i8 %y, %cnt
+  ret i8 %r
+}
+
+define <2 x i8> @fold_sub_ctpop_c(<2 x i8> %x) {
+; CHECK-LABEL: @fold_sub_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[CNT]], <i8 -63, i8 -64>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
+  %cnt = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %nx)
+  %r = sub <2 x i8> %cnt, <i8 63, i8 64>
+  ret <2 x i8> %r
+}
+
+define i8 @fold_add_ctpop_c(i8 %x) {
+; CHECK-LABEL: @fold_add_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i8 [[CNT]], 63
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = add i8 %cnt, 63
+  ret i8 %r
+}
+
+define i8 @fold_distjoint_or_ctpop_c(i8 %x) {
+; CHECK-LABEL: @fold_distjoint_or_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i8 [[CNT]], 64
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = or i8 %cnt, 64
+  ret i8 %r
+}
+
+define i8 @fold_or_ctpop_c_fail(i8 %x) {
+; CHECK-LABEL: @fold_or_ctpop_c_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[CNT]], 65
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = or i8 %cnt, 65
+  ret i8 %r
+}
+
+define i8 @fold_add_ctpop_var_fail(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_add_ctpop_var_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[CNT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = add i8 %cnt, %y
+  ret i8 %r
+}
+
+define i1 @fold_cmp_eq_ctpop_c(i8 %x) {
+; CHECK-LABEL: @fold_cmp_eq_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[CNT]], 2
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = icmp eq i8 %cnt, 2
+  ret i1 %r
+}
+
+define <2 x i1> @fold_cmp_ne_ctpop_c(<2 x i8> %x) {
+; CHECK-LABEL: @fold_cmp_ne_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[CNT]], <i8 44, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
+  %cnt = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %nx)
+  %r = icmp ne <2 x i8> %cnt, <i8 44, i8 3>
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @fold_cmp_ne_ctpop_var_fail(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @fold_cmp_ne_ctpop_var_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[CNT]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
+  %cnt = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %nx)
+  %r = icmp ne <2 x i8> %cnt, %y
+  ret <2 x i1> %r
+}
+
+define i1 @fold_cmp_ult_ctpop_c(i8 %x) {
+; CHECK-LABEL: @fold_cmp_ult_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[CNT]], 5
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %cnt = call i8 @llvm.ctpop.i8(i8 %nx)
+  %r = icmp ult i8 %cnt, 5
+  ret i1 %r
+}
+
+define <2 x i1> @fold_cmp_ugt_ctpop_c(<2 x i8> %x) {
+; CHECK-LABEL: @fold_cmp_ugt_ctpop_c(
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[CNT]], <i8 8, i8 6>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
+  %cnt = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %nx)
+  %r = icmp ugt <2 x i8> %cnt, <i8 8, i8 6>
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @fold_cmp_ugt_ctpop_c_out_of_range_fail(<2 x i8> %x) {
+; CHECK-LABEL: @fold_cmp_ugt_ctpop_c_out_of_range_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[CNT]], <i8 2, i8 10>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
+  %cnt = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %nx)
+  %r = icmp ugt <2 x i8> %cnt, <i8 2, i8 10>
+  ret <2 x i1> %r
+}

>From 31e53862a171e7ddbfd55d770ada5854745ddf1b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 11 Jan 2024 15:41:10 -0800
Subject: [PATCH 2/2] Add folds for `(add/sub/disjoint_or/icmp C, (ctpop (not
 x)))`

`(ctpop (not x))` <-> `(sub nuw nsw BitWidth(x), (ctpop x))`. The
`sub` expression can sometimes be constant folded depending on the use
case of `(ctpop (not x))`.

This patch adds fold for the following cases:

`(add/sub/disjoint_or C, (ctpop (not x))`
    -> `(add/sub/disjoint_or C', (ctpop x))`
`(cmp pred C, (ctpop (not x))`
    -> `(cmp swapped_pred C', (ctpop x))`

Where `C'` depends on how we constant fold `C` with `BitWidth(x)` for
the given opcode.

Proofs: https://alive2.llvm.org/ce/z/qUgfF3
---
 .../InstCombine/InstCombineAddSub.cpp         |  6 ++
 .../InstCombine/InstCombineAndOrXor.cpp       |  3 +
 .../InstCombine/InstCombineCompares.cpp       |  3 +
 .../InstCombine/InstCombineInternal.h         |  4 +
 .../InstCombine/InstructionCombining.cpp      | 77 +++++++++++++++++++
 .../InstCombine/fold-ctpop-of-not.ll          | 40 ++++------
 6 files changed, 109 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c7e6f32c5406a6..8a00b75a1f7404 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1683,6 +1683,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
     }
   }
 
+  if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
+    return R;
+
   // TODO(jingyue): Consider willNotOverflowSignedAdd and
   // willNotOverflowUnsignedAdd to reduce the number of invocations of
   // computeKnownBits.
@@ -2445,6 +2448,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     }
   }
 
+  if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
+    return R;
+
   if (Instruction *R = foldSubOfMinMax(I, Builder))
     return R;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 0620752e321394..de06fb8badf817 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3398,6 +3398,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   if (Instruction *R = foldBinOpShiftWithShift(I))
     return R;
 
+  if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
+    return R;
+
   Value *X, *Y;
   const APInt *CV;
   if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c1aff445524de..8c0fd662255130 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1323,6 +1323,9 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
       return replaceInstUsesWith(Cmp, NewPhi);
     }
 
+  if (Instruction *R = tryFoldInstWithCtpopWithNot(&Cmp))
+    return R;
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 21c61bd990184d..c24b6e3a5b33c0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -505,6 +505,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS,
                                         Value *RHS);
 
+  // If `I` has operand `(ctpop (not x))`, fold `I` with `(sub nuw nsw
+  // BitWidth(x), (ctpop x))`.
+  Instruction *tryFoldInstWithCtpopWithNot(Instruction *I);
+
   // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
   //    -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
   // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7f2018b3a19958..732ab7ad8b3223 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -740,6 +740,83 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
   return RetVal;
 }
 
+// If `I` has one Const operand and the other matches `(ctpop (not x))`,
+// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
+// This is only useful is the new subtract can fold so we only handle the
+// following cases:
+//    1) (add/sub/disjoint_or C, (ctpop (not x))
+//        -> (add/sub/disjoint_or C', (ctpop x))
+//    1) (cmp pred C, (ctpop (not x))
+//        -> (cmp pred C', (ctpop x))
+Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
+  unsigned Opc = I->getOpcode();
+  unsigned ConstIdx = 1;
+  switch (Opc) {
+  default:
+    return nullptr;
+    // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
+    // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
+    // is constant.
+  case Instruction::Sub:
+    ConstIdx = 0;
+    break;
+  case Instruction::Or:
+    if (!match(I, m_DisjointOr(m_Value(), m_Value())))
+      return nullptr;
+    [[fallthrough]];
+  case Instruction::Add:
+  case Instruction::ICmp:
+    break;
+  }
+  // Find ctpop.
+  auto *Ctpop = dyn_cast<IntrinsicInst>(I->getOperand(1 - ConstIdx));
+  if (Ctpop == nullptr)
+    return nullptr;
+  if (Ctpop->getIntrinsicID() != Intrinsic::ctpop)
+    return nullptr;
+  Constant *C;
+  // Check other operand is ImmConstant.
+  if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
+    return nullptr;
+
+  Type *Ty = Ctpop->getType();
+  Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
+  // Need extra check for icmp. Note if this check is it generally means the
+  // icmp will simplify to true/false.
+  if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality() &&
+      !ConstantExpr::getICmp(ICmpInst::ICMP_UGT, C, BitWidthC)->isZeroValue())
+    return nullptr;
+
+  Value *Op = Ctpop->getArgOperand(0);
+  // Check we can invert `(not x)` for free.
+  Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
+  if (NotOp == nullptr)
+    return nullptr;
+  Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
+
+  Value *R = nullptr;
+
+  // Do the transformation here to avoid potentially introducing an infinite
+  // loop.
+  switch (Opc) {
+  case Instruction::Sub:
+    R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
+    break;
+  case Instruction::Or:
+  case Instruction::Add:
+    R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
+    break;
+  case Instruction::ICmp:
+    R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
+                           CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
+    break;
+  default:
+    llvm_unreachable("Unhandled Opcode");
+  }
+  assert(R != nullptr);
+  return replaceInstUsesWith(*I, R);
+}
+
 // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
 //   IFF
 //    1) the logic_shifts match
diff --git a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
index 16cfb5aad3f903..9fa3bb66bb7f10 100644
--- a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
+++ b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
@@ -6,9 +6,8 @@ declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
 
 define i8 @fold_sub_c_ctpop(i8 %x) {
 ; CHECK-LABEL: @fold_sub_c_ctpop(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw i8 12, [[CNT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i8 [[TMP1]], 4
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %nx = xor i8 %x, -1
@@ -32,9 +31,8 @@ define i8 @fold_sub_var_ctpop_fail(i8 %x, i8 %y) {
 
 define <2 x i8> @fold_sub_ctpop_c(<2 x i8> %x) {
 ; CHECK-LABEL: @fold_sub_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[CNT]], <i8 -63, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw <2 x i8> <i8 -55, i8 -56>, [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
@@ -45,9 +43,8 @@ define <2 x i8> @fold_sub_ctpop_c(<2 x i8> %x) {
 
 define i8 @fold_add_ctpop_c(i8 %x) {
 ; CHECK-LABEL: @fold_add_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i8 [[CNT]], 63
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw i8 71, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %nx = xor i8 %x, -1
@@ -58,9 +55,8 @@ define i8 @fold_add_ctpop_c(i8 %x) {
 
 define i8 @fold_distjoint_or_ctpop_c(i8 %x) {
 ; CHECK-LABEL: @fold_distjoint_or_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = or disjoint i8 [[CNT]], 64
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw i8 72, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %nx = xor i8 %x, -1
@@ -97,9 +93,8 @@ define i8 @fold_add_ctpop_var_fail(i8 %x, i8 %y) {
 
 define i1 @fold_cmp_eq_ctpop_c(i8 %x) {
 ; CHECK-LABEL: @fold_cmp_eq_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[CNT]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP1]], 6
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %nx = xor i8 %x, -1
@@ -110,9 +105,8 @@ define i1 @fold_cmp_eq_ctpop_c(i8 %x) {
 
 define <2 x i1> @fold_cmp_ne_ctpop_c(<2 x i8> %x) {
 ; CHECK-LABEL: @fold_cmp_ne_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[CNT]], <i8 44, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], <i8 -36, i8 5>
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
@@ -136,9 +130,8 @@ define <2 x i1> @fold_cmp_ne_ctpop_var_fail(<2 x i8> %x, <2 x i8> %y) {
 
 define i1 @fold_cmp_ult_ctpop_c(i8 %x) {
 ; CHECK-LABEL: @fold_cmp_ult_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[CNT]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[TMP1]], 3
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %nx = xor i8 %x, -1
@@ -149,9 +142,8 @@ define i1 @fold_cmp_ult_ctpop_c(i8 %x) {
 
 define <2 x i1> @fold_cmp_ugt_ctpop_c(<2 x i8> %x) {
 ; CHECK-LABEL: @fold_cmp_ugt_ctpop_c(
-; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]), !range [[RNG0]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[CNT]], <i8 8, i8 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]), !range [[RNG0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 0, i8 2>
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %nx = xor <2 x i8> %x, <i8 -1, i8 -1>