[PATCH] D46228: [ELF] Use union-find set in Call-Chain Clustering (C³) heuristic to improve worst-case time complexity.

Fangrui Song via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 12 14:49:43 PDT 2018


MaskRay updated this revision to Diff 169497.
MaskRay added a comment.

Add a comment about findLeader which uses path-halving technique


Repository:
  rLLD LLVM Linker

https://reviews.llvm.org/D46228

Files:
  ELF/CallGraphSort.cpp


Index: ELF/CallGraphSort.cpp
===================================================================
--- ELF/CallGraphSort.cpp
+++ ELF/CallGraphSort.cpp
@@ -46,6 +46,9 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 
+#include <list>
+#include <numeric>
+
 using namespace llvm;
 using namespace lld;
 using namespace lld::elf;
@@ -68,7 +71,7 @@
     return double(Weight) / double(Size);
   }
 
-  std::vector<int> Sections;
+  std::list<int> Sections;
   size_t Size = 0;
   uint64_t Weight = 0;
   uint64_t InitialWeight = 0;
@@ -155,55 +158,59 @@
   return false;
 }
 
+// Find the leader of V's belonged cluster (represented as an equivalence
+// class). We apply union-find path-halving technique (simple to implement) in
+// the meantime as it decreases depths and the time complexity.
+static int findLeader(std::vector<int> &Leaders, int V) {
+  while (Leaders[V] != V) {
+    Leaders[V] = Leaders[Leaders[V]];
+    V = Leaders[V];
+  }
+  return V;
+}
+
 static void mergeClusters(Cluster &Into, Cluster &From) {
-  Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
-                       From.Sections.end());
+  Into.Sections.splice(Into.Sections.end(), From.Sections);
   Into.Size += From.Size;
   Into.Weight += From.Weight;
-  From.Sections.clear();
   From.Size = 0;
   From.Weight = 0;
 }
 
 // Group InputSections into clusters using the Call-Chain Clustering heuristic
 // then sort the clusters by density.
 void CallGraphSort::groupClusters() {
   std::vector<int> SortedSecs(Clusters.size());
-  std::vector<Cluster *> SecToCluster(Clusters.size());
-
-  for (size_t I = 0; I < Clusters.size(); ++I) {
-    SortedSecs[I] = I;
-    SecToCluster[I] = &Clusters[I];
-  }
+  std::vector<int> Leaders(Clusters.size());
 
+  std::iota(Leaders.begin(), Leaders.end(), 0);
+  std::iota(SortedSecs.begin(), SortedSecs.end(), 0);
   std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
     return Clusters[B].getDensity() < Clusters[A].getDensity();
   });
 
   for (int SI : SortedSecs) {
     // Clusters[SI] is the same as SecToClusters[SI] here because it has not
     // been merged into another cluster yet.
-    Cluster &C = Clusters[SI];
+    int L = findLeader(Leaders, SI);
+    Cluster &C = Clusters[L];
 
     // Don't consider merging if the edge is unlikely.
     if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
       continue;
 
-    Cluster *PredC = SecToCluster[C.BestPred.From];
-    if (PredC == &C)
+    int PredL = findLeader(Leaders, C.BestPred.From);
+    if (L == PredL)
       continue;
 
+    Cluster *PredC = &Clusters[PredL];
     if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
       continue;
 
     if (isNewDensityBad(*PredC, C))
       continue;
 
-    // NOTE: Consider using a disjoint-set to track section -> cluster mapping
-    // if this is ever slow.
-    for (int SI : C.Sections)
-      SecToCluster[SI] = PredC;
-
+    Leaders[L] = PredL;
     mergeClusters(*PredC, C);
   }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46228.169497.patch
Type: text/x-patch
Size: 2995 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181012/7436b621/attachment.bin>


More information about the llvm-commits mailing list