[llvm] [LoopInterchange] Add metadata to control loop-interchange (PR #127474)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 10:01:35 PST 2025


================
@@ -569,6 +609,151 @@ struct LoopInterchange {
 
     return true;
   }
+
+  bool processEnabledLoop(SmallVectorImpl<Loop *> &LoopList,
+                          std::vector<std::vector<char>> &DependencyMatrix,
+                          const DenseMap<const Loop *, unsigned> &CostMap) {
+    bool Changed = false;
+
+    // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
+    DenseMap<Loop *, unsigned> Loop2Index;
+    for (unsigned I = 0; I != LoopList.size(); I++)
+      Loop2Index[LoopList[I]] = I;
+
+    // Hold outer loops to be exchanged, in the current nest order.
+    SmallVector<Loop *, 4> Worklist;
+
+    // Helper funciton to try to add a new loop into the Worklist. Return false
+    // if there is a duplicate in the loop to be interchanged.
+    auto AddLoopIfEnabled = [&](Loop *L) {
+      if (findMetadata(L) == true) {
+        if (!Worklist.empty()) {
+          // Because the loops are sorted in the order of the current nest, it
+          // is sufficient to compare with the last element.
+          unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
+          unsigned OuterLoopId = Loop2Index[L];
+          if (OuterLoopId <= InnerLoopId) {
+            ORE->emit([&]() {
+              return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
+                                              L->getStartLoc(), L->getHeader())
+                     << "The loops to be interchanged are overlapping.";
+            });
+            return false;
+          }
+        }
+        Worklist.push_back(L);
+      }
+      return true;
+    };
+
+    // Initialize Worklist. To process the loops in inner-loop-first order, add
+    // them to the worklist in the outer-loop-first order.
+    for (unsigned I = 0; I != LoopList.size(); I++)
+      if (!AddLoopIfEnabled(LoopList[I]))
+        return Changed;
+
+    // Set an upper bound of the number of transformations to avoid infinite
+    // loop. There is no deep meaning behind the current value (square of the
+    // size of LoopList).
+    // TODO: Is this really necessary?
+    const unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
+    unsigned Attempts = 0;
+
+    // Process the loops. An exchange is applied to two loops, but a metadata
+    // replacement can be applied to three loops: the two loops plus the next
+    // outer loop, if it exists. This is because it's necessary to express the
+    // information about the order of the application of interchanges in cases
+    // where the target loops to be exchanged are overlapping, e.g.,
+    //
+    // #pragma clang loop interchange(enable)
+    // for(int i=0;i<N;i++)
+    //   #pragma clang loop interchange(enable)
+    //   for (int j=0;j<N;j++)
+    //     for (int k=0;k<N;k++)
+    //       ...
+    //
+    // In this case we will exchange the innermost two loops at first, the
+    // follow-up metadata including enabling interchange is attached on the
+    // outermost loop, and it is enqueued as the next candidate to be processed.
+    while (!Worklist.empty() && Attempts < MaxAttemptsCount) {
+      Loop *TargetLoop = Worklist.pop_back_val();
+      assert(findMetadata(TargetLoop) == true &&
+             "Some metadata was unexpectedlly removed");
+      unsigned OuterLoopId = Loop2Index[TargetLoop];
+      unsigned InnerLoopId = OuterLoopId + 1;
+      if (InnerLoopId >= LoopList.size()) {
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
+                                          TargetLoop->getStartLoc(),
+                                          TargetLoop->getHeader())
+                 << "The metadata is invalid with an innermost loop.";
+        });
+        break;
+      }
+      MDNode *LoopID = TargetLoop->getLoopID();
+      bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
+                                      DependencyMatrix, CostMap);
+      if (!Interchanged) {
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
+                                          TargetLoop->getStartLoc(),
+                                          TargetLoop->getHeader())
+                 << "Failed to perform explicitly specified loop interchange.";
+        });
+        break;
+      }
+
+      // The next outer loop, or nullptr if TargetLoop is the outermost one.
+      Loop *NextOuterLoop = nullptr;
+      if (0 < OuterLoopId)
+        NextOuterLoop = LoopList[OuterLoopId - 1];
+      Loop *OuterLoop = LoopList[OuterLoopId];
+      Loop *InnerLoop = LoopList[InnerLoopId];
+      Attempts++;
+      Changed = true;
+      Loop2Index[OuterLoop] = OuterLoopId;
+      Loop2Index[InnerLoop] = InnerLoopId;
+
+      // Update the metadata.
+      std::optional<MDNode *> MDNextOuterLoopID =
+          makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+                                      LLVMLoopInterchangeFollowupNextOuter});
+      std::optional<MDNode *> MDOuterLoopID =
+          makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+                                      LLVMLoopInterchangeFollowupOuter});
+      std::optional<MDNode *> MDInnerLoopID =
+          makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+                                      LLVMLoopInterchangeFollowupInner});
+      if (MDNextOuterLoopID) {
+        if (NextOuterLoop) {
+          NextOuterLoop->setLoopID(*MDNextOuterLoopID);
+        } else {
+          LLVM_DEBUG(dbgs()
+                     << "New metadata for the next outer loop is ignored.\n");
+        }
+      }
+      if (MDOuterLoopID)
+        OuterLoop->setLoopID(*MDOuterLoopID);
+      if (MDInnerLoopID)
+        InnerLoop->setLoopID(*MDInnerLoopID);
----------------
kasuga-fj wrote:

Because this is inside the function `processEnabledLoop`,  these metadata processes are not performed by default (i.e., `OnlyWhenForced` is false). However, if I understand it correctly, follow-up metadata will be generated whenever the pragma enabling interchange is specified, e.g., the following case:

```c
// compilation options: -O3 -floop-interchange

#pragma clang loop interchange(enable) unroll(disable)  // Enabling interchange by pragma doesn't make sense, since loop-interchange is enabled by the compilation option.
for (int i = 0; i < N; i++)
  for (int j = 0; j < N; j++)
    ...
```

So should we also handle the follow-up metadata in `processLoopList`? If my understanding is correct, any other loop optimization passes (like unroll, distribute, etc.) don't handle them in the similar situation.

https://github.com/llvm/llvm-project/pull/127474


More information about the llvm-commits mailing list