[llvm] r350207 - Revert rL350035 "[llvm-exegesis] Clustering: don't enqueue a point multiple times"

Clement Courbet via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 2 01:21:01 PST 2019


Author: courbet
Date: Wed Jan  2 01:21:00 2019
New Revision: 350207

URL: http://llvm.org/viewvc/llvm-project?rev=350207&view=rev
Log:
Revert rL350035 "[llvm-exegesis] Clustering: don't enqueue a point multiple times"

Let's discuss this on the review thread before submitting.

Modified:
    llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp

Modified: llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp?rev=350207&r1=350206&r2=350207&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp Wed Jan  2 01:21:00 2019
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Clustering.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include <string>
 
@@ -91,14 +92,8 @@ llvm::Error InstructionBenchmarkClusteri
 }
 
 void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
-  const size_t NumPoints = Points_.size();
-
-  // Persistent buffers to avoid allocs.
-  std::vector<size_t> Neighbors;
-  std::vector<size_t> ToProcess(NumPoints);
-  std::vector<char> Processed(NumPoints);
-
-  for (size_t P = 0; P < NumPoints; ++P) {
+  std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
+  for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
     if (!ClusterIdForPoint_[P].isUndef())
       continue; // Previously processed in inner loop.
     rangeQuery(P, Neighbors);
@@ -114,40 +109,43 @@ void InstructionBenchmarkClustering::dbS
     Cluster &CurrentCluster = Clusters_.back();
     ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
     CurrentCluster.PointIndices.push_back(P);
-    Processed[P] = 1;
 
-    // Enqueue P's neighbors.
-    size_t Tail = 0;
-    auto EnqueueUnprocessed = [&](const std::vector<size_t> &Neighbors) {
-      for (size_t Q : Neighbors)
-        if (!Processed[Q]) {
-          ToProcess[Tail++] = Q;
-          Processed[Q] = 1;
-        }
-    };
-    EnqueueUnprocessed(Neighbors);
-
-    for (size_t Head = 0; Head < Tail; ++Head) {
-      // Retrieve a point from the queue and add it to the current cluster.
-      P = ToProcess[Head];
-      ClusterId OldCID = ClusterIdForPoint_[P];
-      ClusterIdForPoint_[P] = CurrentCluster.Id;
-      CurrentCluster.PointIndices.push_back(P);
-      if (OldCID.isNoise())
+    // Process P's neighbors.
+    llvm::SetVector<size_t, std::deque<size_t>> ToProcess;
+    ToProcess.insert(Neighbors.begin(), Neighbors.end());
+    while (!ToProcess.empty()) {
+      // Retrieve a point from the set.
+      const size_t Q = *ToProcess.begin();
+      ToProcess.erase(ToProcess.begin());
+
+      if (ClusterIdForPoint_[Q].isNoise()) {
+        // Change noise point to border point.
+        ClusterIdForPoint_[Q] = CurrentCluster.Id;
+        CurrentCluster.PointIndices.push_back(Q);
         continue;
-      assert(OldCID.isUndef());
-
-      // And extend to the neighbors of P if the region is dense enough.
-      rangeQuery(P, Neighbors);
-      if (Neighbors.size() + 1 >= MinPts)
-        EnqueueUnprocessed(Neighbors);
+      }
+      if (!ClusterIdForPoint_[Q].isUndef()) {
+        continue; // Previously processed.
+      }
+      // Add Q to the current custer.
+      ClusterIdForPoint_[Q] = CurrentCluster.Id;
+      CurrentCluster.PointIndices.push_back(Q);
+      // And extend to the neighbors of Q if the region is dense enough.
+      rangeQuery(Q, Neighbors);
+      if (Neighbors.size() + 1 >= MinPts) {
+        ToProcess.insert(Neighbors.begin(), Neighbors.end());
+      }
     }
   }
+  // assert(Neighbors.capacity() == (Points_.size() - 1));
+  // ^ True, but it is not quaranteed to be true in all the cases.
 
   // Add noisy points to noise cluster.
-  for (size_t P = 0; P < NumPoints; ++P)
-    if (ClusterIdForPoint_[P].isNoise())
+  for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
+    if (ClusterIdForPoint_[P].isNoise()) {
       NoiseCluster_.PointIndices.push_back(P);
+    }
+  }
 }
 
 llvm::Expected<InstructionBenchmarkClustering>




More information about the llvm-commits mailing list