[PATCH] An attempt to make the search algorithm easier to understand.

Tue Feb 12 13:33:53 PST 2013

Hi djasper,

- clear ownership: the SpecificBumpPtrAllocator owns all StateEdges
- this allows us to simplify the memoization data structure into a
  std::set (FIXME: figure out whether we want to use a hash based
  data structure).
- introduces StateEdge as recursive data structure, instead of using
  Edge and the Seen-map combined to drill through the graph
- using a count to stabilize the penalty instead of relying on the
  container
- pulled out a method to forward-apply states in the end

This leads to a ~40% runtime decrease on Nico's benchmark.

Main FiXME is that the parameter lists of some function get too long.
I'd vote for either pulling the Queue etc into the Formatter proper,
or creating an inner class just for the search algorithm.

http://llvm-reviews.chandlerc.com/D398

Files:
  lib/Format/Format.cpp

Index: lib/Format/Format.cpp
===================================================================

--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -23,7 +23,9 @@
 #include "clang/Format/Format.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
+#include <queue>
 #include <string>
 
 namespace clang {
@@ -633,94 +635,124 @@
     return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
   }
 
-  /// \brief An edge in the solution space starting from the \c LineState and
-  /// inserting a newline dependent on the \c bool.
-  typedef std::pair<bool, const LineState *> Edge;
+  /// \brief An edge in the solution space from \c Previous->State to \c State,
+  /// inserting a newline dependent on the \c NewLine.
+  struct StateEdge {
+    StateEdge(const LineState &State, bool NewLine, StateEdge *Previous)
+        : State(State), NewLine(NewLine), Previous(Previous) {
+    }
+    LineState State;
+    bool NewLine;
+    StateEdge *Previous;
+  };
+
+  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
+  ///
+  /// In case of equal penalties, we want to prefer states that were inserted
+  /// first. During state generation we make sure that we insert states first
+  /// that break the line as late as possible.
+  typedef std::pair<unsigned, unsigned> StablePenalty;
+
+  /// \brief An item in the prioritized BFS search queue. The \c StateEdge 
+  typedef std::pair<StablePenalty, StateEdge *> QueueItem;
 
-  /// \brief An item in the prioritized BFS search queue. The \c LineState was
-  /// reached using the \c Edge.
-  typedef std::pair<LineState, Edge> QueueItem;
+  /// \brief The BFS queue type.
+  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
+                              std::greater<QueueItem> > QueueType;
 
   /// \brief Analyze the entire solution space starting from \p InitialState.
   ///
   /// This implements a variant of Dijkstra's algorithm on the graph that spans
   /// the solution space (\c LineStates are the nodes). The algorithm tries to
   /// find the shortest path (the one with lowest penalty) from \p InitialState
   /// to a state where all tokens are placed.
-  unsigned analyzeSolutionSpace(const LineState &InitialState) {
+  unsigned analyzeSolutionSpace(LineState &InitialState) {
+    llvm::SpecificBumpPtrAllocator<StateEdge> Allocator;
+
+    // Increasing count of \c StateEdge items we have created. This is used
+    // to stabilize the priority queue.
+    unsigned Count = 0;
+    QueueType Queue;
+
+    std::set<LineState> Seen;
+
     // Insert start element into queue.
-    std::multimap<unsigned, QueueItem> Queue;
-    Queue.insert(std::pair<unsigned, QueueItem>(
-        0, QueueItem(InitialState, Edge(false, (const LineState *)0))));
-    std::map<LineState, Edge> Seen;
+    StateEdge *State =
+        new (Allocator.Allocate()) StateEdge(InitialState, false, NULL);
+    Queue.push(QueueItem(StablePenalty(0, Count), State));
+    ++Count;
 
     // While not empty, take first element and follow edges.
     while (!Queue.empty()) {
-      unsigned Penalty = Queue.begin()->first;
-      QueueItem Item = Queue.begin()->second;
-      if (Item.first.NextToken == NULL) {
+      unsigned Penalty = Queue.top().first.first;
+      StateEdge *Edge = Queue.top().second;
+      if (Edge->State.NextToken == NULL) {
         DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
         break;
       }
-      Queue.erase(Queue.begin());
+      Queue.pop();
 
-      if (Seen.find(Item.first) != Seen.end())
-        continue; // State already examined with lower penalty.
-
-      const LineState &SavedState = Seen.insert(std::pair<LineState, Edge>(
-          Item.first,
-          Edge(Item.second.first, Item.second.second))).first->first;
+      if (!Seen.insert(Edge->State).second)
+        // State already examined with lower penalty.
+        continue;
 
-      addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ false, Queue);
-      addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ true, Queue);
+      addNextStateToQueue(Allocator, Queue, Count, Penalty, Edge,
+                          /*NewLine=*/ false);
+      addNextStateToQueue(Allocator, Queue, Count, Penalty, Edge,
+                          /*NewLine=*/ true);
     }
 
     if (Queue.empty())
       // We were unable to find a solution, do nothing.
       // FIXME: Add diagnostic?
       return 0;
 
     // Reconstruct the solution.
-    // FIXME: Add debugging output.
-    Edge *CurrentEdge = &Queue.begin()->second.second;
-    while (CurrentEdge->second != NULL) {
-      LineState CurrentState = *CurrentEdge->second;
-      if (CurrentEdge->first) {
-        DEBUG(llvm::errs() << "Penalty for splitting before "
-                           << CurrentState.NextToken->FormatTok.Tok.getName()
-                           << ": " << CurrentState.NextToken->SplitPenalty
-                           << "\n");
-      }
-      addTokenToState(CurrentEdge->first, false, CurrentState);
-      CurrentEdge = &Seen[*CurrentEdge->second];
-    }
+    apply(InitialState, Queue.top().second);
     DEBUG(llvm::errs() << "---\n");
 
     // Return the column after the last token of the solution.
-    return Queue.begin()->second.first.Column;
+    return Queue.top().second->State.Column;
+  }
+
+  void apply(LineState &State, StateEdge *Current) {
+    if (Current->Previous == NULL)
+      return;
+    apply(State, Current->Previous);
+    DEBUG({
+      if (Current->NewLine) {
+        llvm::errs() << "Penalty for splitting before "
+                     << Current->State.NextToken->FormatTok.Tok.getName()
+                     << ": " << Current->State.NextToken->SplitPenalty << "\n";
+      }
+    });
+    addTokenToState(Current->NewLine, false, State);
   }
 
   /// \brief Add the following state to the analysis queue \p Queue.
   ///
   /// Assume the current state is \p OldState and has been reached with a
   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
-  void addNextStateToQueue(const LineState &OldState, unsigned Penalty,
-                           bool NewLine,
-                           std::multimap<unsigned, QueueItem> &Queue) {
-    if (NewLine && !canBreak(OldState))
+  void addNextStateToQueue(llvm::SpecificBumpPtrAllocator<StateEdge> &Allocator,
+                           QueueType &Queue, unsigned &Count, unsigned Penalty,
+                           StateEdge *Old, bool NewLine) {
+    if (NewLine && !canBreak(Old->State))
       return;
-    if (!NewLine && mustBreak(OldState))
+    if (!NewLine && mustBreak(Old->State))
       return;
-    LineState State(OldState);
     if (NewLine)
-      Penalty += State.NextToken->SplitPenalty;
-    addTokenToState(NewLine, true, State);
-    if (State.Column > getColumnLimit()) {
-      unsigned ExcessCharacters = State.Column - getColumnLimit();
+      Penalty += Old->State.NextToken->SplitPenalty;
+
+    StateEdge *Edge =
+        new (Allocator.Allocate()) StateEdge(Old->State, NewLine, Old);
+    addTokenToState(NewLine, true, Edge->State);
+    if (Edge->State.Column > getColumnLimit()) {
+      unsigned ExcessCharacters = Edge->State.Column - getColumnLimit();
       Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
     }
-    Queue.insert(std::pair<unsigned, QueueItem>(
-        Penalty, QueueItem(State, Edge(NewLine, &OldState))));
+
+    Queue.push(QueueItem(StablePenalty(Penalty, Count), Edge));
+    ++Count;
   }
 
   /// \brief Returns \c true, if a line break after \p State is allowed.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D398.1.patch
Type: text/x-patch
Size: 7676 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130212/ba915b0b/attachment.bin>