[llvm] [LowerMemIntrinsics] Factor control flow generation out of the memcpy lowering (PR #169039)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 08:57:12 PST 2025
================
@@ -21,6 +21,219 @@
using namespace llvm;
+/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
+/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
+static Value *getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len,
+ Value *OpSize, unsigned OpSizeVal) {
+ // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
+ if (isPowerOf2_32(OpSizeVal))
+ return B.CreateAnd(Len, OpSizeVal - 1);
+ return B.CreateURem(Len, OpSize);
+}
+
+/// \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization
+/// opportunities.
+/// If \p RTLoopRemainder is provided, it must be the result of
+/// \c getRuntimeLoopRemainder() with the same arguments.
+static Value *getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize,
+ unsigned OpSizeVal,
+ Value *RTLoopRemainder = nullptr) {
+ if (!RTLoopRemainder)
+ RTLoopRemainder = getRuntimeLoopRemainder(B, Len, OpSize, OpSizeVal);
+ return B.CreateSub(Len, RTLoopRemainder);
+}
+
+namespace {
+/// Container for the return values of insertLoopExpansion.
+struct LoopExpansionInfo {
+ /// The instruction at the end of the main loop body.
+ Instruction *MainLoopIP = nullptr;
+
+ /// The unit index in the main loop body.
+ Value *MainLoopIndex = nullptr;
+
+ /// The instruction at the end of the residual loop body. Can be nullptr if no
+ /// residual is required.
+ Instruction *ResidualLoopIP = nullptr;
+
+ /// The unit index in the residual loop body. Can be nullptr if no residual is
+ /// required.
+ Value *ResidualLoopIndex = nullptr;
+};
+} // namespace
+
+/// Insert the control flow and loop counters for a memcpy/memset loop
+/// expansion.
+///
+/// This function inserts IR corresponding to the following C code before
+/// \p InsertBefore:
+/// \code
+/// LoopUnits = (Len / MainLoopStep) * MainLoopStep;
+/// ResidualUnits = Len - LoopUnits;
+/// MainLoopIndex = 0;
+/// if (LoopUnits > 0) {
+/// do {
+/// // MainLoopIP
+/// MainLoopIndex += MainLoopStep;
+/// } while (MainLoopIndex < LoopUnits);
+/// }
+/// for (size_t i = 0; i < ResidualUnits; i += ResidualLoopStep) {
+/// ResidualLoopIndex = LoopUnits + i;
+/// // ResidualLoopIP
+/// }
+/// \endcode
+///
+/// \p MainLoopStep and \p ResidualLoopStep determine by how many "units" the
+/// loop index is increased in each iteration of the main and residual loops,
+/// respectively. In most cases, the "unit" will be bytes, but larger units are
+/// useful for lowering memset.pattern.
+///
+/// The computation of \c LoopUnits and \c ResidualUnits is performed at compile
+/// time if \p Len is a \c ConstantInt.
+/// The second (residual) loop is omitted if \p ResidualLoopStep is 0 or equal
+/// to \p MainLoopStep.
+/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
+/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
+static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
+ Value *Len, unsigned MainLoopStep,
+ unsigned ResidualLoopStep,
+ StringRef BBNamePrefix) {
+ assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
+ "ResidualLoopStep must divide MainLoopStep if specified");
+ assert(ResidualLoopStep <= MainLoopStep &&
+ "ResidualLoopStep cannot be larger than MainLoopStep");
+ assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
+ LoopExpansionInfo LEI;
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
+ InsertBefore, BBNamePrefix + "-post-expansion");
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+ IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
+
+ // Calculate the main loop trip count and remaining units to cover after the
+ // loop.
+ Type *LenType = Len->getType();
+ IntegerType *ILenType = dyn_cast<IntegerType>(LenType);
+ assert(ILenType && "expected length to be an integer type!");
+ ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
+
+ Value *LoopUnits = Len;
+ Value *ResidualUnits = nullptr;
+ // We can make a conditional branch unconditional if we know that the
+ // MainLoop must be executed at least once.
+ bool MustTakeMainLoop = false;
+ if (MainLoopStep != 1) {
+ if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
+ uint64_t TotalUnits = CLen->getZExtValue();
+ uint64_t LoopEndCount = alignDown(TotalUnits, MainLoopStep);
+ uint64_t ResidualCount = TotalUnits - LoopEndCount;
+ LoopUnits = ConstantInt::get(LenType, LoopEndCount);
+ ResidualUnits = ConstantInt::get(LenType, ResidualCount);
+ MustTakeMainLoop = LoopEndCount > 0;
+ // As an optimization, we could skip generating the residual loop if
+ // ResidualCount is known to be 0. However, current uses of this function
+ // don't request a residual loop if the length is constant (they generate
+ // a (potentially empty) sequence of loads and stores instead), so this
+ // optimization would have no effect here.
+ } else {
+ ResidualUnits = getRuntimeLoopRemainder(PreLoopBuilder, Len,
+ CIMainLoopStep, MainLoopStep);
+ LoopUnits = getRuntimeLoopUnits(PreLoopBuilder, Len, CIMainLoopStep,
+ MainLoopStep, ResidualUnits);
+ }
+ } else if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
+ MustTakeMainLoop = CLen->getZExtValue() > 0;
+ }
+
+ BasicBlock *MainLoopBB = BasicBlock::Create(
+ Ctx, BBNamePrefix + "-expansion-main-body", ParentFunc, PostLoopBB);
+ IRBuilder<> LoopBuilder(MainLoopBB);
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
+ LEI.MainLoopIndex = LoopIndex;
+ LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
+ LoopIndex->addIncoming(NewIndex, MainLoopBB);
+
+ // One argument of the addition is a loop-variant PHI, so it must be an
+ // Instruction (i.e., it cannot be a Constant).
+ LEI.MainLoopIP = cast<Instruction>(NewIndex);
+
+ if (0 < ResidualLoopStep && ResidualLoopStep < MainLoopStep) {
----------------
arsenm wrote:
```suggestion
if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
```
https://github.com/llvm/llvm-project/pull/169039
More information about the llvm-commits
mailing list