[llvm] [LoopInterchange] Add metadata to control loop-interchange (PR #127474)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 10:01:35 PST 2025
================
@@ -569,6 +609,151 @@ struct LoopInterchange {
return true;
}
+
+ bool processEnabledLoop(SmallVectorImpl<Loop *> &LoopList,
+ std::vector<std::vector<char>> &DependencyMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
+ bool Changed = false;
+
+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
+ DenseMap<Loop *, unsigned> Loop2Index;
+ for (unsigned I = 0; I != LoopList.size(); I++)
+ Loop2Index[LoopList[I]] = I;
+
+ // Hold outer loops to be exchanged, in the current nest order.
+ SmallVector<Loop *, 4> Worklist;
+
+ // Helper funciton to try to add a new loop into the Worklist. Return false
+ // if there is a duplicate in the loop to be interchanged.
+ auto AddLoopIfEnabled = [&](Loop *L) {
+ if (findMetadata(L) == true) {
+ if (!Worklist.empty()) {
+ // Because the loops are sorted in the order of the current nest, it
+ // is sufficient to compare with the last element.
+ unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
+ unsigned OuterLoopId = Loop2Index[L];
+ if (OuterLoopId <= InnerLoopId) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
+ L->getStartLoc(), L->getHeader())
+ << "The loops to be interchanged are overlapping.";
+ });
+ return false;
+ }
+ }
+ Worklist.push_back(L);
+ }
+ return true;
+ };
+
+ // Initialize Worklist. To process the loops in inner-loop-first order, add
+ // them to the worklist in the outer-loop-first order.
+ for (unsigned I = 0; I != LoopList.size(); I++)
+ if (!AddLoopIfEnabled(LoopList[I]))
+ return Changed;
+
+ // Set an upper bound of the number of transformations to avoid infinite
+ // loop. There is no deep meaning behind the current value (square of the
+ // size of LoopList).
+ // TODO: Is this really necessary?
+ const unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
+ unsigned Attempts = 0;
+
+ // Process the loops. An exchange is applied to two loops, but a metadata
+ // replacement can be applied to three loops: the two loops plus the next
+ // outer loop, if it exists. This is because it's necessary to express the
+ // information about the order of the application of interchanges in cases
+ // where the target loops to be exchanged are overlapping, e.g.,
+ //
+ // #pragma clang loop interchange(enable)
+ // for(int i=0;i<N;i++)
+ // #pragma clang loop interchange(enable)
+ // for (int j=0;j<N;j++)
+ // for (int k=0;k<N;k++)
+ // ...
+ //
+ // In this case we will exchange the innermost two loops at first, the
+ // follow-up metadata including enabling interchange is attached on the
+ // outermost loop, and it is enqueued as the next candidate to be processed.
+ while (!Worklist.empty() && Attempts < MaxAttemptsCount) {
+ Loop *TargetLoop = Worklist.pop_back_val();
+ assert(findMetadata(TargetLoop) == true &&
+ "Some metadata was unexpectedlly removed");
+ unsigned OuterLoopId = Loop2Index[TargetLoop];
+ unsigned InnerLoopId = OuterLoopId + 1;
+ if (InnerLoopId >= LoopList.size()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
+ TargetLoop->getStartLoc(),
+ TargetLoop->getHeader())
+ << "The metadata is invalid with an innermost loop.";
+ });
+ break;
+ }
+ MDNode *LoopID = TargetLoop->getLoopID();
+ bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
+ DependencyMatrix, CostMap);
+ if (!Interchanged) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
+ TargetLoop->getStartLoc(),
+ TargetLoop->getHeader())
+ << "Failed to perform explicitly specified loop interchange.";
+ });
+ break;
+ }
+
+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
+ Loop *NextOuterLoop = nullptr;
+ if (0 < OuterLoopId)
+ NextOuterLoop = LoopList[OuterLoopId - 1];
+ Loop *OuterLoop = LoopList[OuterLoopId];
+ Loop *InnerLoop = LoopList[InnerLoopId];
+ Attempts++;
+ Changed = true;
+ Loop2Index[OuterLoop] = OuterLoopId;
+ Loop2Index[InnerLoop] = InnerLoopId;
+
+ // Update the metadata.
+ std::optional<MDNode *> MDNextOuterLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupNextOuter});
+ std::optional<MDNode *> MDOuterLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupOuter});
+ std::optional<MDNode *> MDInnerLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupInner});
+ if (MDNextOuterLoopID) {
+ if (NextOuterLoop) {
+ NextOuterLoop->setLoopID(*MDNextOuterLoopID);
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "New metadata for the next outer loop is ignored.\n");
+ }
+ }
+ if (MDOuterLoopID)
+ OuterLoop->setLoopID(*MDOuterLoopID);
+ if (MDInnerLoopID)
+ InnerLoop->setLoopID(*MDInnerLoopID);
----------------
kasuga-fj wrote:
Because this is inside the function `processEnabledLoop`, these metadata processes are not performed by default (i.e., `OnlyWhenForced` is false). However, if I understand it correctly, follow-up metadata will be generated whenever the pragma enabling interchange is specified, e.g., the following case:
```c
// compilation options: -O3 -floop-interchange
#pragma clang loop interchange(enable) unroll(disable) // Enabling interchange by pragma doesn't make sense, since loop-interchange is enabled by the compilation option.
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
...
```
So should we also handle the follow-up metadata in `processLoopList`? If my understanding is correct, any other loop optimization passes (like unroll, distribute, etc.) don't handle them in the similar situation.
https://github.com/llvm/llvm-project/pull/127474
More information about the llvm-commits
mailing list