[llvm] [LoopIdiom] Select llvm.experimental.memset.pattern intrinsic rather than memset_pattern16 libcall (PR #126736)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 20:14:26 PST 2025
================
@@ -1064,53 +1073,95 @@ bool LoopIdiomRecognize::processLoopStridedStore(
return Changed;
// Okay, everything looks good, insert the memset.
+ // MemsetArg is the number of bytes for the memset libcall, and the number
+ // of pattern repetitions if the memset.pattern intrinsic is being used.
+ Value *MemsetArg;
+ std::optional<int64_t> BytesWritten = std::nullopt;
+
+ if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) {
+ const SCEV *TripCountS =
+ SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop);
+ if (!Expander.isSafeToExpand(TripCountS))
+ return Changed;
+ const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+ if (!ConstStoreSize)
+ return Changed;
+ Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy,
+ Preheader->getTerminator());
+ uint64_t PatternRepsPerTrip =
+ (ConstStoreSize->getValue()->getZExtValue() * 8) /
+ DL->getTypeSizeInBits(PatternValue->getType());
+ // If ConstStoreSize is not equal to the width of PatternValue, then
+ // MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else
+ // MemSetArg is just TripCount.
+ MemsetArg =
+ PatternRepsPerTrip == 1
+ ? TripCount
+ : Builder.CreateMul(TripCount,
+ Builder.getIntN(IntIdxTy->getIntegerBitWidth(),
+ PatternRepsPerTrip));
+ if (auto CI = dyn_cast<ConstantInt>(TripCount))
+ BytesWritten =
+ CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue();
+ } else {
+ const SCEV *NumBytesS =
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
- const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
-
- // TODO: ideally we should still be able to generate memset if SCEV expander
- // is taught to generate the dependencies at the latest point.
- if (!Expander.isSafeToExpand(NumBytesS))
- return Changed;
-
- Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+ // TODO: ideally we should still be able to generate memset if SCEV expander
+ // is taught to generate the dependencies at the latest point.
+ if (!Expander.isSafeToExpand(NumBytesS))
+ return Changed;
+ MemsetArg =
+ Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+ if (auto CI = dyn_cast<ConstantInt>(MemsetArg))
----------------
topperc wrote:
`auto *CI`
https://github.com/llvm/llvm-project/pull/126736
More information about the llvm-commits
mailing list