[clang] [llvm] Match bitsin(typeof(x)) - popcnt(x) to s_bcnt0_i32 on AMDGPU (PR #164847)
Patrick Simmons via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 27 11:57:24 PDT 2025
================
@@ -1977,6 +1989,37 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
return insertValues(Builder, FractArg->getType(), ResultVals);
}
+bool AMDGPUCodeGenPrepareImpl::visitCtpop(IntrinsicInst &I) {
+ uint32_t BitWidth, DestinationWidth, IntrinsicWidth;
+ if (!I.hasOneUse() || !I.getType()->isIntegerTy() ||
+ !ST.hasBCNT(BitWidth = I.getType()->getIntegerBitWidth()))
+ return false;
+
+ BinaryOperator *MustBeSub = dyn_cast<BinaryOperator>(I.user_back());
+ if (!MustBeSub || MustBeSub->getOpcode() != BinaryOperator::Sub)
+ return false;
+
+ ConstantInt *FirstOperand = dyn_cast<ConstantInt>(MustBeSub->getOperand(0));
+ if (!FirstOperand || FirstOperand->getZExtValue() != BitWidth)
+ return false;
+
+ IRBuilder<> Builder(MustBeSub);
+ Instruction *TransformedIns =
+ Builder.CreateIntrinsic(BitWidth > 32 ? Intrinsic::amdgcn_bcnt64_lo
+ : Intrinsic::amdgcn_bcnt32_lo,
+ {}, {I.getArgOperand(0)});
+
+ if ((DestinationWidth = MustBeSub->getType()->getIntegerBitWidth()) !=
+ (IntrinsicWidth = TransformedIns->getType()->getIntegerBitWidth()))
+ TransformedIns = cast<Instruction>(Builder.CreateZExtOrTrunc(
----------------
linuxrocks123 wrote:
That's true, but it should always be an Instruction, so I like the cast and the internal assertion it provides.
https://github.com/llvm/llvm-project/pull/164847
More information about the cfe-commits
mailing list