[llvm] [LoopInterchange] Add metadata to control loop-interchange (PR #127474)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 04:08:39 PDT 2025
================
@@ -569,6 +609,151 @@ struct LoopInterchange {
return true;
}
+
+ bool processEnabledLoop(SmallVectorImpl<Loop *> &LoopList,
+ std::vector<std::vector<char>> &DependencyMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
+ bool Changed = false;
+
+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
+ DenseMap<Loop *, unsigned> Loop2Index;
+ for (unsigned I = 0; I != LoopList.size(); I++)
+ Loop2Index[LoopList[I]] = I;
+
+ // Hold outer loops to be exchanged, in the current nest order.
+ SmallVector<Loop *, 4> Worklist;
+
+ // Helper funciton to try to add a new loop into the Worklist. Return false
+ // if there is a duplicate in the loop to be interchanged.
+ auto AddLoopIfEnabled = [&](Loop *L) {
+ if (findMetadata(L) == true) {
+ if (!Worklist.empty()) {
+ // Because the loops are sorted in the order of the current nest, it
+ // is sufficient to compare with the last element.
+ unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
+ unsigned OuterLoopId = Loop2Index[L];
+ if (OuterLoopId <= InnerLoopId) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
+ L->getStartLoc(), L->getHeader())
+ << "The loops to be interchanged are overlapping.";
+ });
+ return false;
+ }
+ }
+ Worklist.push_back(L);
+ }
+ return true;
+ };
+
+ // Initialize Worklist. To process the loops in inner-loop-first order, add
+ // them to the worklist in the outer-loop-first order.
+ for (unsigned I = 0; I != LoopList.size(); I++)
+ if (!AddLoopIfEnabled(LoopList[I]))
+ return Changed;
+
+ // Set an upper bound of the number of transformations to avoid infinite
+ // loop. There is no deep meaning behind the current value (square of the
+ // size of LoopList).
+ // TODO: Is this really necessary?
+ const unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
+ unsigned Attempts = 0;
+
+ // Process the loops. An exchange is applied to two loops, but a metadata
+ // replacement can be applied to three loops: the two loops plus the next
+ // outer loop, if it exists. This is because it's necessary to express the
+ // information about the order of the application of interchanges in cases
+ // where the target loops to be exchanged are overlapping, e.g.,
+ //
+ // #pragma clang loop interchange(enable)
+ // for(int i=0;i<N;i++)
+ // #pragma clang loop interchange(enable)
+ // for (int j=0;j<N;j++)
+ // for (int k=0;k<N;k++)
+ // ...
+ //
+ // In this case we will exchange the innermost two loops at first, the
+ // follow-up metadata including enabling interchange is attached on the
+ // outermost loop, and it is enqueued as the next candidate to be processed.
+ while (!Worklist.empty() && Attempts < MaxAttemptsCount) {
+ Loop *TargetLoop = Worklist.pop_back_val();
+ assert(findMetadata(TargetLoop) == true &&
+ "Some metadata was unexpectedlly removed");
+ unsigned OuterLoopId = Loop2Index[TargetLoop];
+ unsigned InnerLoopId = OuterLoopId + 1;
+ if (InnerLoopId >= LoopList.size()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
+ TargetLoop->getStartLoc(),
+ TargetLoop->getHeader())
+ << "The metadata is invalid with an innermost loop.";
+ });
+ break;
+ }
+ MDNode *LoopID = TargetLoop->getLoopID();
+ bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
+ DependencyMatrix, CostMap);
+ if (!Interchanged) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
+ TargetLoop->getStartLoc(),
+ TargetLoop->getHeader())
+ << "Failed to perform explicitly specified loop interchange.";
+ });
+ break;
+ }
+
+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
+ Loop *NextOuterLoop = nullptr;
+ if (0 < OuterLoopId)
+ NextOuterLoop = LoopList[OuterLoopId - 1];
+ Loop *OuterLoop = LoopList[OuterLoopId];
+ Loop *InnerLoop = LoopList[InnerLoopId];
+ Attempts++;
+ Changed = true;
+ Loop2Index[OuterLoop] = OuterLoopId;
+ Loop2Index[InnerLoop] = InnerLoopId;
+
+ // Update the metadata.
+ std::optional<MDNode *> MDNextOuterLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupNextOuter});
+ std::optional<MDNode *> MDOuterLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupOuter});
+ std::optional<MDNode *> MDInnerLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupInner});
+ if (MDNextOuterLoopID) {
+ if (NextOuterLoop) {
+ NextOuterLoop->setLoopID(*MDNextOuterLoopID);
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "New metadata for the next outer loop is ignored.\n");
+ }
+ }
+ if (MDOuterLoopID)
+ OuterLoop->setLoopID(*MDOuterLoopID);
+ if (MDInnerLoopID)
+ InnerLoop->setLoopID(*MDInnerLoopID);
----------------
kasuga-fj wrote:
I misunderstood other passes' behavior.
> Loops should be re-added to the worklist, so any followup loop interchange can be processed.
Yes, this is already done.
My concern is how to handle metadata when we use the bubble sort algorithm. Do the changes in https://github.com/llvm/llvm-project/pull/127474/commits/7eff317c65c2849866090384c47357f2e32aa1f7 make sense?
https://github.com/llvm/llvm-project/pull/127474
More information about the llvm-commits
mailing list