[llvm] 051d330 - Avoid creating an immutable map in the Automaton class.

Marcello Maggioni via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 16 18:45:54 PST 2020


Author: Marcello Maggioni
Date: 2020-01-16T18:44:20-08:00
New Revision: 051d330314cb1f175025ca37da8e5e1d851e1790

URL: https://github.com/llvm/llvm-project/commit/051d330314cb1f175025ca37da8e5e1d851e1790
DIFF: https://github.com/llvm/llvm-project/commit/051d330314cb1f175025ca37da8e5e1d851e1790.diff

LOG: Avoid creating an immutable map in the Automaton class.

Summary:
In the DFAPacketizer we copy the Transitions array
into a map in order to later access the transitions
based on a "Current State/Action" pair as a key.
This map lives in the Automaton object used by the DFAPacketizer.
It is never changed during the life of the object after
having been created during the creation of the Automaton
itself.

This map creation can make the creation of a DFAPacketizer
quite expensive if the target contains a considerable
amount of transition states.

Considering that TableGen already generates a
sorted list of transitions by State/Action pairs
we could just use that directly in our Automaton
and search entries with std::lower_bound instead of copying
it in a map and paying the execution time and memory cost.

Reviewers: jmolloy, ThomasRaoux

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72682

Added: 
    

Modified: 
    llvm/include/llvm/Support/Automaton.h
    llvm/utils/TableGen/DFAEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/Automaton.h b/llvm/include/llvm/Support/Automaton.h
index c2b921311a8c..e945d74f936f 100644
--- a/llvm/include/llvm/Support/Automaton.h
+++ b/llvm/include/llvm/Support/Automaton.h
@@ -154,29 +154,42 @@ class NfaTranscriber {
 };
 } // namespace internal
 
+// Type representing a transition in the DFA state.
+// The action type can be a custom type.
+template <typename ActionT> struct TransitionType {
+  using StateT = unsigned;
+  StateT FromDfaState; // The transitioned-from DFA state.
+  ActionT Action;      // The input symbol that causes this transition.
+  StateT ToDfaState;   // The transitioned-to DFA state.
+  unsigned InfoIdx;    // Start index into TransitionInfo.
+};
+
 /// A deterministic finite-state automaton. The automaton is defined in
 /// TableGen; this object drives an automaton defined by tblgen-emitted tables.
 ///
 /// An automaton accepts a sequence of input tokens ("actions"). This class is
 /// templated on the type of these actions.
 template <typename ActionT> class Automaton {
+  using TransitionT = TransitionType<ActionT>;
+  using TransitionKeyT = std::pair<decltype(TransitionT::FromDfaState),
+                                   decltype(TransitionT::Action)>;
+  using StateT = typename TransitionT::StateT;
   /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
   /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
-  /// FIXME: This uses a std::map because ActionT can be a pair type including
-  /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
-  /// DenseMap here.
-  /// This is a shared_ptr to allow very quick copy-construction of Automata; this
-  /// state is immutable after construction so this is safe.
-  using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
-  std::shared_ptr<MapTy> M;
+  ArrayRef<TransitionT> TransitionMap;
   /// An optional transcription object. This uses much more state than simply
   /// traversing the DFA for acceptance, so is heap allocated.
   std::shared_ptr<internal::NfaTranscriber> Transcriber;
   /// The initial DFA state is 1.
-  uint64_t State = 1;
+  StateT State = 1;
   /// True if we should transcribe and false if not (even if Transcriber is defined).
   bool Transcribe;
 
+  static bool transitionLessThan(const TransitionT &A,
+                                 const TransitionKeyT &B) {
+    return std::make_pair(A.FromDfaState, A.Action) < B;
+  }
+
 public:
   /// Create an automaton.
   /// \param Transitions The Transitions table as created by TableGen. Note that
@@ -189,21 +202,24 @@ template <typename ActionT> class Automaton {
   /// NFA taken by the DFA. NOTE: This is substantially more work than simply
   /// driving the DFA, so unless you require the getPaths() method leave this
   /// empty.
-  template <typename InfoT>
-  Automaton(ArrayRef<InfoT> Transitions,
+  Automaton(ArrayRef<TransitionT> Transitions,
             ArrayRef<NfaStatePair> TranscriptionTable = {}) {
     if (!TranscriptionTable.empty())
       Transcriber =
           std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
     Transcribe = Transcriber != nullptr;
-    M = std::make_shared<MapTy>();
-    for (const auto &I : Transitions)
-      // Greedily read and cache the transition table.
-      M->emplace(std::make_pair(I.FromDfaState, I.Action),
-                 std::make_pair(I.ToDfaState, I.InfoIdx));
+    TransitionMap = Transitions;
+    assert(std::is_sorted(TransitionMap.begin(), TransitionMap.end(),
+                          [](const TransitionT &A, const TransitionT &B) {
+                            return std::make_pair(A.FromDfaState, A.Action) <
+                                   std::make_pair(B.FromDfaState, B.Action);
+                          }) &&
+           "The transitions array is expected to be sorted by FromDfaState and "
+           "Action");
   }
   Automaton(const Automaton &Other)
-      : M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
+      : TransitionMap(Other.TransitionMap), State(Other.State),
+        Transcribe(Other.Transcribe) {
     // Transcriber is not thread-safe, so create a new instance on copy.
     if (Other.Transcriber)
       Transcriber = std::make_shared<internal::NfaTranscriber>(
@@ -233,19 +249,22 @@ template <typename ActionT> class Automaton {
   /// If this function returns false, all methods are undefined until reset() is
   /// called.
   bool add(const ActionT &A) {
-    auto I = M->find({State, A});
-    if (I == M->end())
+    auto I = lower_bound(TransitionMap.begin(), TransitionMap.end(),
+                         std::make_pair(State, A), transitionLessThan);
+    if (I == TransitionMap.end() || State != I->FromDfaState || A != I->Action)
       return false;
     if (Transcriber && Transcribe)
-      Transcriber->transition(I->second.second);
-    State = I->second.first;
+      Transcriber->transition(I->InfoIdx);
+    State = I->ToDfaState;
     return true;
   }
 
   /// Return true if the automaton can be transitioned based on input symbol A.
   bool canAdd(const ActionT &A) {
-    auto I = M->find({State, A});
-    return I != M->end();
+    auto I = lower_bound(TransitionMap.begin(), TransitionMap.end(),
+                         std::make_pair(State, A), transitionLessThan);
+    return I != TransitionMap.end() && State == I->FromDfaState &&
+           A == I->Action;
   }
 
   /// Obtain a set of possible paths through the input nondeterministic

diff  --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index dd3db7c150ba..af05551c7c32 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -131,15 +131,9 @@ void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
   OS << "}};\n\n";
 
   OS << "// A transition in the generated " << Name << " DFA.\n";
-  OS << "struct " << Name << "Transition {\n";
-  OS << "  unsigned FromDfaState; // The transitioned-from DFA state.\n";
-  OS << "  ";
+  OS << "using " << Name << "Transition = TransitionType<";
   printActionType(OS);
-  OS << " Action;       // The input symbol that causes this transition.\n";
-  OS << "  unsigned ToDfaState;   // The transitioned-to DFA state.\n";
-  OS << "  unsigned InfoIdx;      // Start index into " << Name
-     << "TransitionInfo.\n";
-  OS << "};\n\n";
+  OS << ">;\n\n";
 
   OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
   OS << "// The initial state is 1, not zero.\n";


        


More information about the llvm-commits mailing list