[llvm] [LoopInterchange] Add metadata to control loop-interchange (PR #127474)

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 10:17:13 PST 2025


================
@@ -569,6 +609,151 @@ struct LoopInterchange {
 
     return true;
   }
+
+  bool processEnabledLoop(SmallVectorImpl<Loop *> &LoopList,
+                          std::vector<std::vector<char>> &DependencyMatrix,
+                          const DenseMap<const Loop *, unsigned> &CostMap) {
+    bool Changed = false;
+
+    // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
+    DenseMap<Loop *, unsigned> Loop2Index;
+    for (unsigned I = 0; I != LoopList.size(); I++)
+      Loop2Index[LoopList[I]] = I;
+
+    // Hold outer loops to be exchanged, in the current nest order.
+    SmallVector<Loop *, 4> Worklist;
+
+    // Helper funciton to try to add a new loop into the Worklist. Return false
+    // if there is a duplicate in the loop to be interchanged.
+    auto AddLoopIfEnabled = [&](Loop *L) {
+      if (findMetadata(L) == true) {
+        if (!Worklist.empty()) {
+          // Because the loops are sorted in the order of the current nest, it
+          // is sufficient to compare with the last element.
+          unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
+          unsigned OuterLoopId = Loop2Index[L];
+          if (OuterLoopId <= InnerLoopId) {
+            ORE->emit([&]() {
+              return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
+                                              L->getStartLoc(), L->getHeader())
+                     << "The loops to be interchanged are overlapping.";
+            });
+            return false;
+          }
+        }
+        Worklist.push_back(L);
+      }
+      return true;
+    };
+
+    // Initialize Worklist. To process the loops in inner-loop-first order, add
+    // them to the worklist in the outer-loop-first order.
+    for (unsigned I = 0; I != LoopList.size(); I++)
+      if (!AddLoopIfEnabled(LoopList[I]))
+        return Changed;
+
+    // Set an upper bound of the number of transformations to avoid infinite
+    // loop. There is no deep meaning behind the current value (square of the
+    // size of LoopList).
+    // TODO: Is this really necessary?
+    const unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
+    unsigned Attempts = 0;
+
+    // Process the loops. An exchange is applied to two loops, but a metadata
+    // replacement can be applied to three loops: the two loops plus the next
+    // outer loop, if it exists. This is because it's necessary to express the
+    // information about the order of the application of interchanges in cases
+    // where the target loops to be exchanged are overlapping, e.g.,
+    //
+    // #pragma clang loop interchange(enable)
+    // for(int i=0;i<N;i++)
+    //   #pragma clang loop interchange(enable)
+    //   for (int j=0;j<N;j++)
+    //     for (int k=0;k<N;k++)
+    //       ...
+    //
+    // In this case we will exchange the innermost two loops at first, the
+    // follow-up metadata including enabling interchange is attached on the
+    // outermost loop, and it is enqueued as the next candidate to be processed.
+    while (!Worklist.empty() && Attempts < MaxAttemptsCount) {
+      Loop *TargetLoop = Worklist.pop_back_val();
+      assert(findMetadata(TargetLoop) == true &&
+             "Some metadata was unexpectedlly removed");
+      unsigned OuterLoopId = Loop2Index[TargetLoop];
+      unsigned InnerLoopId = OuterLoopId + 1;
+      if (InnerLoopId >= LoopList.size()) {
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
+                                          TargetLoop->getStartLoc(),
+                                          TargetLoop->getHeader())
+                 << "The metadata is invalid with an innermost loop.";
+        });
+        break;
+      }
+      MDNode *LoopID = TargetLoop->getLoopID();
+      bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
+                                      DependencyMatrix, CostMap);
+      if (!Interchanged) {
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
+                                          TargetLoop->getStartLoc(),
+                                          TargetLoop->getHeader())
+                 << "Failed to perform explicitly specified loop interchange.";
+        });
+        break;
+      }
+
+      // The next outer loop, or nullptr if TargetLoop is the outermost one.
+      Loop *NextOuterLoop = nullptr;
+      if (0 < OuterLoopId)
+        NextOuterLoop = LoopList[OuterLoopId - 1];
+      Loop *OuterLoop = LoopList[OuterLoopId];
+      Loop *InnerLoop = LoopList[InnerLoopId];
+      Attempts++;
+      Changed = true;
+      Loop2Index[OuterLoop] = OuterLoopId;
+      Loop2Index[InnerLoop] = InnerLoopId;
+
+      // Update the metadata.
+      std::optional<MDNode *> MDNextOuterLoopID =
+          makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+                                      LLVMLoopInterchangeFollowupNextOuter});
+      std::optional<MDNode *> MDOuterLoopID =
+          makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+                                      LLVMLoopInterchangeFollowupOuter});
+      std::optional<MDNode *> MDInnerLoopID =
+          makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+                                      LLVMLoopInterchangeFollowupInner});
+      if (MDNextOuterLoopID) {
+        if (NextOuterLoop) {
+          NextOuterLoop->setLoopID(*MDNextOuterLoopID);
+        } else {
+          LLVM_DEBUG(dbgs()
+                     << "New metadata for the next outer loop is ignored.\n");
+        }
+      }
+      if (MDOuterLoopID)
+        OuterLoop->setLoopID(*MDOuterLoopID);
+      if (MDInnerLoopID)
+        InnerLoop->setLoopID(*MDInnerLoopID);
----------------
Meinersbur wrote:

Loops should be re-added to the worklist, so any followup loop interchange can be processed.

Other passes only apply just one transformation, usually because doing it multiple times is nonsensical. E.g. wou would not want to vectorize a loop twice.

https://github.com/llvm/llvm-project/pull/127474


More information about the llvm-commits mailing list