[clang-tools-extra] cd2292e - [pseudo] A basic implementation of compiling cxx grammar at build time.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Wed May 25 02:26:16 PDT 2022


Author: Haojian Wu
Date: 2022-05-25T11:26:06+02:00
New Revision: cd2292ef824591cc34cc299910a3098545c840c7

URL: https://github.com/llvm/llvm-project/commit/cd2292ef824591cc34cc299910a3098545c840c7
DIFF: https://github.com/llvm/llvm-project/commit/cd2292ef824591cc34cc299910a3098545c840c7.diff

LOG: [pseudo] A basic implementation of compiling cxx grammar at build time.

The main idea is to compile the cxx grammar at build time, and construct
the core pieces (Grammar, LRTable) of the pseudoparse based on the compiled
data sources.

This is a tiny implementation, which is good for start:

- defines how the public API should look like;
- integrates the cxx grammar compilation workflow with the cmake system.
- onlynonterminal symbols of the C++ grammar are compiled, anything
  else are still doing the real compilation work at runtime, we can opt-in more
  bits in the future;
- splits the monolithic clangPsuedo library for better layering;

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D125667

Added: 
    clang-tools-extra/pseudo/gen/CMakeLists.txt
    clang-tools-extra/pseudo/gen/Main.cpp
    clang-tools-extra/pseudo/include/CMakeLists.txt
    clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
    clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
    clang-tools-extra/pseudo/lib/cxx/CXX.cpp
    clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt
    clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
    clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
    clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
    clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
    clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp

Modified: 
    clang-tools-extra/pseudo/CMakeLists.txt
    clang-tools-extra/pseudo/lib/CMakeLists.txt

Removed: 
    clang-tools-extra/pseudo/lib/Grammar.cpp
    clang-tools-extra/pseudo/lib/GrammarBNF.cpp
    clang-tools-extra/pseudo/lib/LRGraph.cpp
    clang-tools-extra/pseudo/lib/LRTable.cpp
    clang-tools-extra/pseudo/lib/LRTableBuild.cpp


################################################################################
diff  --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt
index 0891cc0a0f885..24bc1530bb7d6 100644
--- a/clang-tools-extra/pseudo/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/CMakeLists.txt
@@ -1,5 +1,7 @@
 include_directories(include)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
+add_subdirectory(include)
+add_subdirectory(gen)
 add_subdirectory(lib)
 add_subdirectory(tool)
 add_subdirectory(fuzzer)

diff  --git a/clang-tools-extra/pseudo/gen/CMakeLists.txt b/clang-tools-extra/pseudo/gen/CMakeLists.txt
new file mode 100644
index 0000000000000..a104e05b53da9
--- /dev/null
+++ b/clang-tools-extra/pseudo/gen/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS Support)
+
+add_clang_executable(pseudo-gen
+  Main.cpp
+  )
+
+target_link_libraries(pseudo-gen
+  PRIVATE
+  clangPseudoGrammar
+  )

diff  --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp
new file mode 100644
index 0000000000000..535f863268df1
--- /dev/null
+++ b/clang-tools-extra/pseudo/gen/Main.cpp
@@ -0,0 +1,89 @@
+//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a tool to compile a BNF grammar, it is used by the build system to
+// generate a necessary data bits to statically construct core pieces (Grammar,
+// LRTable etc) of the LR parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/Grammar.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+
+using llvm::cl::desc;
+using llvm::cl::init;
+using llvm::cl::opt;
+using llvm::cl::values;
+
+namespace {
+enum EmitType {
+  EmitSymbolList,
+  EmitGrammarContent,
+};
+
+opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."),
+                         init(""));
+opt<EmitType>
+    Emit(desc("which information to emit:"),
+         values(clEnumValN(EmitSymbolList, "emit-symbol-list",
+                           "Print nonterminal symbols (default)"),
+                clEnumValN(EmitGrammarContent, "emit-grammar-content",
+                           "Print the BNF grammar content as a string")));
+std::string readOrDie(llvm::StringRef Path) {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+      llvm::MemoryBuffer::getFile(Path);
+  if (std::error_code EC = Text.getError()) {
+    llvm::errs() << "Error: can't read grammar file '" << Path
+                 << "': " << EC.message() << "\n";
+    ::exit(1);
+  }
+  return Text.get()->getBuffer().str();
+}
+} // namespace
+
+int main(int argc, char *argv[]) {
+  llvm::cl::ParseCommandLineOptions(argc, argv, "");
+  if (!Grammar.getNumOccurrences()) {
+    llvm::errs() << "Grammar file must be provided!\n";
+    return 1;
+  }
+
+  std::string GrammarText = readOrDie(Grammar);
+  std::vector<std::string> Diags;
+  auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags);
+
+  if (!Diags.empty()) {
+    llvm::errs() << llvm::join(Diags, "\n");
+    return 1;
+  }
+  switch (Emit) {
+
+  case EmitSymbolList:
+    for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size();
+         ++ID) {
+      std::string Name = G->symbolName(ID).str();
+      // translation-unit -> translation_unit
+      std::replace(Name.begin(), Name.end(), '-', '_');
+      llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID));
+    }
+    break;
+  case EmitGrammarContent:
+    for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
+      llvm::outs() << '"';
+      llvm::outs().write_escaped((Line + "\n").str());
+      llvm::outs() << "\"\n";
+    }
+    break;
+  }
+
+  return 0;
+}

diff  --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt
new file mode 100644
index 0000000000000..e2a6f0efc0a32
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/CMakeLists.txt
@@ -0,0 +1,29 @@
+# The cxx.bnf grammar file
+set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf)
+
+# Generate inc files.
+set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc)
+add_custom_command(OUTPUT ${cxx_symbols_inc}
+   COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen"
+     --grammar ${cxx_bnf}
+     --emit-symbol-list
+     > ${cxx_symbols_inc}
+   COMMENT "Generating nonterminal symbol file for cxx grammar..."
+   DEPENDS pseudo-gen
+   VERBATIM)
+
+set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc)
+add_custom_command(OUTPUT ${cxx_bnf_inc}
+   COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen"
+     --grammar ${cxx_bnf}
+     --emit-grammar-content
+     > ${cxx_bnf_inc}
+   COMMENT "Generating bnf string file for cxx grammar..."
+   DEPENDS pseudo-gen
+   VERBATIM)
+
+# add_custom_command does not create a new target, we need to deine a target
+# explicitly, so that other targets can depend on it.
+add_custom_target(cxx_gen
+    DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc}
+    VERBATIM)

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
new file mode 100644
index 0000000000000..edeeb636d83ed
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -0,0 +1,51 @@
+//===--- CXX.h - Public interfaces for the C++ grammar -----------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines public interfaces for the C++ grammar
+//  (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces
+//  of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar
+//  file at the runtime.
+//
+//  We do a compilation of the C++ BNF grammar at build time, and generate
+//  critical data sources. The implementation of the interfaces are based on the
+//  generated data sources.
+//
+//  FIXME: not everything is fully compiled yet. The implementation of the
+//  interfaces are still parsing the grammar file at the runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_CXX_CXX_H
+#define CLANG_PSEUDO_CXX_CXX_H
+
+#include "clang-pseudo/Grammar.h"
+
+namespace clang {
+namespace pseudo {
+class LRTable;
+
+namespace cxx {
+// Symbol represents nonterminal symbols in the C++ grammar.
+// It provides a simple uniform way to access a particular nonterminal.
+enum class Symbol : SymbolID {
+#define NONTERMINAL(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef NONTERMINAL
+};
+
+// Returns the C++ grammar.
+const Grammar &getGrammar();
+// Returns the corresponding LRTable for the C++ grammar.
+const LRTable &getLRTable();
+
+} // namespace cxx
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_CXX_CXX_H

diff  --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt
index 6dc8ed5b5e7a2..f312b10f7d8b4 100644
--- a/clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,3 +1,6 @@
+add_subdirectory(cxx)
+add_subdirectory(grammar)
+
 set(LLVM_LINK_COMPONENTS Support)
 
 add_clang_library(clangPseudo
@@ -5,15 +8,11 @@ add_clang_library(clangPseudo
   DirectiveTree.cpp
   Forest.cpp
   GLR.cpp
-  Grammar.cpp
-  GrammarBNF.cpp
   Lex.cpp
-  LRGraph.cpp
-  LRTable.cpp
-  LRTableBuild.cpp
   Token.cpp
 
   LINK_LIBS
   clangBasic
   clangLex
+  clangPseudoGrammar
   )

diff  --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
new file mode 100644
index 0000000000000..9e10f2ba5388e
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_clang_library(clangPseudoCXX
+  CXX.cpp
+
+  DEPENDS
+  cxx_gen
+
+  LINK_LIBS
+  clangPseudoGrammar
+  )

diff  --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
new file mode 100644
index 0000000000000..3d594b722f1ca
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -0,0 +1,34 @@
+//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+namespace cxx {
+
+static const char *CXXBNF =
+#include "CXXBNF.inc"
+    ;
+
+const Grammar &getGrammar() {
+  static std::vector<std::string> Diags;
+  static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release();
+  assert(Diags.empty());
+  return *G;
+}
+
+const LRTable &getLRTable() {
+  static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
+  return *Table;
+}
+
+} // namespace cxx
+} // namespace pseudo
+} // namespace clang

diff  --git a/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt
new file mode 100644
index 0000000000000..d50cb7df2a2cc
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(LLVM_LINK_COMPONENTS Support)
+
+# This library intents to keep as minimal dependencies as possible, it is a base
+# library of the cxx generator, to avoid creating long dep paths in the build
+# graph.
+add_clang_library(clangPseudoGrammar
+  Grammar.cpp
+  GrammarBNF.cpp
+  LRGraph.cpp
+  LRTable.cpp
+  LRTableBuild.cpp
+
+  # FIXME: can we get rid of the clangBasic dependency? We need it for the
+  # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we
+  # could consider remimplement these functions.
+  LINK_LIBS
+  clangBasic
+  )

diff  --git a/clang-tools-extra/pseudo/lib/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
similarity index 100%
rename from clang-tools-extra/pseudo/lib/Grammar.cpp
rename to clang-tools-extra/pseudo/lib/grammar/Grammar.cpp

diff  --git a/clang-tools-extra/pseudo/lib/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
similarity index 100%
rename from clang-tools-extra/pseudo/lib/GrammarBNF.cpp
rename to clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp

diff  --git a/clang-tools-extra/pseudo/lib/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
similarity index 100%
rename from clang-tools-extra/pseudo/lib/LRGraph.cpp
rename to clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp

diff  --git a/clang-tools-extra/pseudo/lib/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
similarity index 100%
rename from clang-tools-extra/pseudo/lib/LRTable.cpp
rename to clang-tools-extra/pseudo/lib/grammar/LRTable.cpp

diff  --git a/clang-tools-extra/pseudo/lib/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
similarity index 100%
rename from clang-tools-extra/pseudo/lib/LRTableBuild.cpp
rename to clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp


        


More information about the cfe-commits mailing list