[llvm] [LowerMemIntrinsics] Factor control flow generation out of the memcpy lowering (PR #169039)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 2 08:57:12 PST 2025


================
@@ -21,6 +21,219 @@
 
 using namespace llvm;
 
+/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
+/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
+static Value *getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len,
+                                      Value *OpSize, unsigned OpSizeVal) {
+  // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
+  if (isPowerOf2_32(OpSizeVal))
+    return B.CreateAnd(Len, OpSizeVal - 1);
+  return B.CreateURem(Len, OpSize);
+}
+
+/// \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization
+/// opportunities.
+/// If \p RTLoopRemainder is provided, it must be the result of
+/// \c getRuntimeLoopRemainder() with the same arguments.
+static Value *getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize,
+                                  unsigned OpSizeVal,
+                                  Value *RTLoopRemainder = nullptr) {
+  if (!RTLoopRemainder)
+    RTLoopRemainder = getRuntimeLoopRemainder(B, Len, OpSize, OpSizeVal);
+  return B.CreateSub(Len, RTLoopRemainder);
+}
+
+namespace {
+/// Container for the return values of insertLoopExpansion.
+struct LoopExpansionInfo {
+  /// The instruction at the end of the main loop body.
+  Instruction *MainLoopIP = nullptr;
+
+  /// The unit index in the main loop body.
+  Value *MainLoopIndex = nullptr;
+
+  /// The instruction at the end of the residual loop body. Can be nullptr if no
+  /// residual is required.
+  Instruction *ResidualLoopIP = nullptr;
+
+  /// The unit index in the residual loop body. Can be nullptr if no residual is
+  /// required.
+  Value *ResidualLoopIndex = nullptr;
+};
+} // namespace
+
+/// Insert the control flow and loop counters for a memcpy/memset loop
+/// expansion.
+///
+/// This function inserts IR corresponding to the following C code before
+/// \p InsertBefore:
+/// \code
+/// LoopUnits = (Len / MainLoopStep) * MainLoopStep;
+/// ResidualUnits = Len - LoopUnits;
+/// MainLoopIndex = 0;
+/// if (LoopUnits > 0) {
+///   do {
+///     // MainLoopIP
+///     MainLoopIndex += MainLoopStep;
+///   } while (MainLoopIndex < LoopUnits);
+/// }
+/// for (size_t i = 0; i < ResidualUnits; i += ResidualLoopStep) {
+///   ResidualLoopIndex = LoopUnits + i;
+///   // ResidualLoopIP
+/// }
+/// \endcode
+///
+/// \p MainLoopStep and \p ResidualLoopStep determine by how many "units" the
+/// loop index is increased in each iteration of the main and residual loops,
+/// respectively. In most cases, the "unit" will be bytes, but larger units are
+/// useful for lowering memset.pattern.
+///
+/// The computation of \c LoopUnits and \c ResidualUnits is performed at compile
+/// time if \p Len is a \c ConstantInt.
+/// The second (residual) loop is omitted if \p ResidualLoopStep is 0 or equal
+/// to \p MainLoopStep.
+/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
+/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
+static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
+                                             Value *Len, unsigned MainLoopStep,
+                                             unsigned ResidualLoopStep,
+                                             StringRef BBNamePrefix) {
+  assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
+         "ResidualLoopStep must divide MainLoopStep if specified");
+  assert(ResidualLoopStep <= MainLoopStep &&
+         "ResidualLoopStep cannot be larger than MainLoopStep");
+  assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
+  LoopExpansionInfo LEI;
+  BasicBlock *PreLoopBB = InsertBefore->getParent();
+  BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
+      InsertBefore, BBNamePrefix + "-post-expansion");
+  Function *ParentFunc = PreLoopBB->getParent();
+  LLVMContext &Ctx = PreLoopBB->getContext();
+  IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
+
+  // Calculate the main loop trip count and remaining units to cover after the
+  // loop.
+  Type *LenType = Len->getType();
+  IntegerType *ILenType = dyn_cast<IntegerType>(LenType);
+  assert(ILenType && "expected length to be an integer type!");
+  ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
+
+  Value *LoopUnits = Len;
+  Value *ResidualUnits = nullptr;
+  // We can make a conditional branch unconditional if we know that the
+  // MainLoop must be executed at least once.
+  bool MustTakeMainLoop = false;
+  if (MainLoopStep != 1) {
+    if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
+      uint64_t TotalUnits = CLen->getZExtValue();
+      uint64_t LoopEndCount = alignDown(TotalUnits, MainLoopStep);
+      uint64_t ResidualCount = TotalUnits - LoopEndCount;
+      LoopUnits = ConstantInt::get(LenType, LoopEndCount);
+      ResidualUnits = ConstantInt::get(LenType, ResidualCount);
+      MustTakeMainLoop = LoopEndCount > 0;
+      // As an optimization, we could skip generating the residual loop if
+      // ResidualCount is known to be 0. However, current uses of this function
+      // don't request a residual loop if the length is constant (they generate
+      // a (potentially empty) sequence of loads and stores instead), so this
+      // optimization would have no effect here.
+    } else {
+      ResidualUnits = getRuntimeLoopRemainder(PreLoopBuilder, Len,
+                                              CIMainLoopStep, MainLoopStep);
+      LoopUnits = getRuntimeLoopUnits(PreLoopBuilder, Len, CIMainLoopStep,
+                                      MainLoopStep, ResidualUnits);
+    }
+  } else if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
+    MustTakeMainLoop = CLen->getZExtValue() > 0;
+  }
+
+  BasicBlock *MainLoopBB = BasicBlock::Create(
+      Ctx, BBNamePrefix + "-expansion-main-body", ParentFunc, PostLoopBB);
+  IRBuilder<> LoopBuilder(MainLoopBB);
+
+  PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
+  LEI.MainLoopIndex = LoopIndex;
+  LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
+
+  Value *NewIndex =
+      LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
+  LoopIndex->addIncoming(NewIndex, MainLoopBB);
+
+  // One argument of the addition is a loop-variant PHI, so it must be an
+  // Instruction (i.e., it cannot be a Constant).
+  LEI.MainLoopIP = cast<Instruction>(NewIndex);
+
+  if (0 < ResidualLoopStep && ResidualLoopStep < MainLoopStep) {
----------------
arsenm wrote:

```suggestion
  if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
```

https://github.com/llvm/llvm-project/pull/169039


More information about the llvm-commits mailing list