[clang] 9ca50e8 - [libTooling] Add parser for string representation of `RangeSelector`.

Yitzhak Mandelbaum via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 18 18:12:42 PDT 2020


Author: Yitzhak Mandelbaum
Date: 2020-06-19T01:11:29Z
New Revision: 9ca50e887db7f903c04a90593d2beed8a96794f1

URL: https://github.com/llvm/llvm-project/commit/9ca50e887db7f903c04a90593d2beed8a96794f1
DIFF: https://github.com/llvm/llvm-project/commit/9ca50e887db7f903c04a90593d2beed8a96794f1.diff

LOG: [libTooling] Add parser for string representation of `RangeSelector`.

This patch adds a parser for a `RangeSelector` written as a string. The format
is closely based on the way one would right the selector in C++. This should
enable use of `RangeSelector`s from tools like clang-query and web UIs.

Added: 
    clang/include/clang/Tooling/Transformer/Parsing.h
    clang/lib/Tooling/Transformer/Parsing.cpp

Modified: 
    clang/lib/Tooling/Transformer/CMakeLists.txt
    clang/unittests/Tooling/RangeSelectorTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Transformer/Parsing.h b/clang/include/clang/Tooling/Transformer/Parsing.h
new file mode 100644
index 000000000000..8e51f595cd5b
--- /dev/null
+++ b/clang/include/clang/Tooling/Transformer/Parsing.h
@@ -0,0 +1,41 @@
+//===--- Parsing.h - Parsing library for Transformer ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file
+///  Defines parsing functions for Transformer types.
+///  FIXME: Currently, only supports `RangeSelectors` but parsers for other
+///  Transformer types are under development.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_
+#define LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Tooling/Transformer/RangeSelector.h"
+#include "llvm/Support/Error.h"
+#include <functional>
+#include <string>
+
+namespace clang {
+namespace transformer {
+
+/// Parses a string representation of a \c RangeSelector. The grammar of these
+/// strings is closely based on the (sub)grammar of \c RangeSelectors as they'd
+/// appear in C++ code. However, this language constrains the set of permissible
+/// strings (for node ids) -- it does not support escapes in the
+/// string. Additionally, the \c charRange combinator is not supported, because
+/// there is no representation of values of type \c CharSourceRange in this
+/// (little) language.
+llvm::Expected<RangeSelector> parseRangeSelector(llvm::StringRef Input);
+
+} // namespace transformer
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_

diff  --git a/clang/lib/Tooling/Transformer/CMakeLists.txt b/clang/lib/Tooling/Transformer/CMakeLists.txt
index 281af1007a65..150b71b1ffcd 100644
--- a/clang/lib/Tooling/Transformer/CMakeLists.txt
+++ b/clang/lib/Tooling/Transformer/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
 )
 
 add_clang_library(clangTransformer
+  Parsing.cpp
   RangeSelector.cpp
   RewriteRule.cpp
   SourceCode.cpp

diff  --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
new file mode 100644
index 000000000000..1579115b9313
--- /dev/null
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -0,0 +1,279 @@
+//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Transformer/Parsing.h"
+#include "clang/AST/Expr.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Transformer/RangeSelector.h"
+#include "clang/Tooling/Transformer/SourceCode.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+using namespace transformer;
+
+// FIXME: This implementation is entirely separate from that of the AST
+// matchers. Given the similarity of the languages and uses of the two parsers,
+// the two should share a common parsing infrastructure, as should other
+// Transformer types. We intend to unify this implementation soon to share as
+// much as possible with the AST Matchers parsing.
+
+namespace {
+using llvm::Error;
+using llvm::Expected;
+
+template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
+
+struct ParseState {
+  // The remaining input to be processed.
+  StringRef Input;
+  // The original input. Not modified during parsing; only for reference in
+  // error reporting.
+  StringRef OriginalInput;
+};
+
+// Represents an intermediate result returned by a parsing function. Functions
+// that don't generate values should use `llvm::None`
+template <typename ResultType> struct ParseProgress {
+  ParseState State;
+  // Intermediate result generated by the Parser.
+  ResultType Value;
+};
+
+template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
+template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
+
+class ParseError : public llvm::ErrorInfo<ParseError> {
+public:
+  // Required field for all ErrorInfo derivatives.
+  static char ID;
+
+  ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
+      : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
+        Excerpt(std::move(InputExcerpt)) {}
+
+  void log(llvm::raw_ostream &OS) const override {
+    OS << "parse error at position (" << Pos << "): " << ErrorMsg
+       << ": " + Excerpt;
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+
+  // Position of the error in the input string.
+  size_t Pos;
+  std::string ErrorMsg;
+  // Excerpt of the input starting at the error position.
+  std::string Excerpt;
+};
+
+char ParseError::ID;
+} // namespace
+
+static const llvm::StringMap<RangeSelectorOp<std::string>> &
+getUnaryStringSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
+      {"name", name},
+      {"node", node},
+      {"statement", statement},
+      {"statements", statements},
+      {"member", member},
+      {"callArgs", callArgs},
+      {"elseBranch", elseBranch},
+      {"initListElements", initListElements}};
+  return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
+getUnaryRangeSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
+      {"before", before}, {"after", after}, {"expansion", expansion}};
+  return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
+getBinaryStringSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
+      {"encloseNodes", range}};
+  return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
+getBinaryRangeSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
+      M = {{"enclose", range}};
+  return M;
+}
+
+template <typename Element>
+llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
+                                     llvm::StringRef Key) {
+  auto it = Map.find(Key);
+  if (it == Map.end())
+    return llvm::None;
+  return it->second;
+}
+
+template <typename ResultType>
+ParseProgress<ResultType> makeParseProgress(ParseState State,
+                                            ResultType Result) {
+  return ParseProgress<ResultType>{State, std::move(Result)};
+}
+
+static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
+  size_t Pos = S.OriginalInput.size() - S.Input.size();
+  return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
+                                      S.OriginalInput.substr(Pos, 20).str());
+}
+
+// Returns a new ParseState that advances \c S by \c N characters.
+static ParseState advance(ParseState S, size_t N) {
+  S.Input = S.Input.drop_front(N);
+  return S;
+}
+
+static StringRef consumeWhitespace(StringRef S) {
+  return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); });
+}
+
+// Parses a single expected character \c c from \c State, skipping preceding
+// whitespace.  Error if the expected character isn't found.
+static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
+  State.Input = consumeWhitespace(State.Input);
+  if (State.Input.empty() || State.Input.front() != c)
+    return makeParseError(State,
+                          ("expected char not found: " + llvm::Twine(c)).str());
+  return makeParseProgress(advance(State, 1), llvm::None);
+}
+
+// Parses an identitifer "token" -- handles preceding whitespace.
+static ExpectedProgress<std::string> parseId(ParseState State) {
+  State.Input = consumeWhitespace(State.Input);
+  auto Id = State.Input.take_while(
+      [](char c) { return c >= 0 && isIdentifierBody(c); });
+  if (Id.empty())
+    return makeParseError(State, "failed to parse name");
+  return makeParseProgress(advance(State, Id.size()), Id.str());
+}
+
+// For consistency with the AST matcher parser and C++ code, node ids are
+// written as strings. However, we do not support escaping in the string.
+static ExpectedProgress<std::string> parseStringId(ParseState State) {
+  State.Input = consumeWhitespace(State.Input);
+  if (State.Input.empty())
+    return makeParseError(State, "unexpected end of input");
+  if (!State.Input.consume_front("\""))
+    return makeParseError(
+        State,
+        "expecting string, but encountered other character or end of input");
+
+  StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
+  if (State.Input.size() == Id.size())
+    return makeParseError(State, "unterminated string");
+  // Advance past the trailing quote as well.
+  return makeParseProgress(advance(State, Id.size() + 1), Id.str());
+}
+
+// Parses a single element surrounded by parens. `Op` is applied to the parsed
+// result to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
+                                            RangeSelectorOp<T> Op,
+                                            ParseState State) {
+  auto P = parseChar('(', State);
+  if (!P)
+    return P.takeError();
+
+  auto E = ParseElement(P->State);
+  if (!E)
+    return E.takeError();
+
+  P = parseChar(')', E->State);
+  if (!P)
+    return P.takeError();
+
+  return makeParseProgress(P->State, Op(std::move(E->Value)));
+}
+
+// Parses a pair of elements surrounded by parens and separated by comma. `Op`
+// is applied to the parsed results to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
+                                          RangeSelectorOp<T, T> Op,
+                                          ParseState State) {
+  auto P = parseChar('(', State);
+  if (!P)
+    return P.takeError();
+
+  auto Left = ParseElement(P->State);
+  if (!Left)
+    return Left.takeError();
+
+  P = parseChar(',', Left->State);
+  if (!P)
+    return P.takeError();
+
+  auto Right = ParseElement(P->State);
+  if (!Right)
+    return Right.takeError();
+
+  P = parseChar(')', Right->State);
+  if (!P)
+    return P.takeError();
+
+  return makeParseProgress(P->State,
+                           Op(std::move(Left->Value), std::move(Right->Value)));
+}
+
+// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
+// Id operator). Returns StencilType representing the operator on success and
+// error if it fails to parse input for an operator.
+static ExpectedProgress<RangeSelector>
+parseRangeSelectorImpl(ParseState State) {
+  auto Id = parseId(State);
+  if (!Id)
+    return Id.takeError();
+
+  std::string OpName = std::move(Id->Value);
+  if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
+    return parseSingle(parseStringId, *Op, Id->State);
+
+  if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
+    return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
+
+  if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
+    return parsePair(parseStringId, *Op, Id->State);
+
+  if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
+    return parsePair(parseRangeSelectorImpl, *Op, Id->State);
+
+  return makeParseError(State, "unknown selector name: " + OpName);
+}
+
+Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
+  ParseState State = {Input, Input};
+  ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
+  if (!Result)
+    return Result.takeError();
+  State = Result->State;
+  // Discard any potentially trailing whitespace.
+  State.Input = consumeWhitespace(State.Input);
+  if (State.Input.empty())
+    return Result->Value;
+  return makeParseError(State, "unexpected input after selector");
+}

diff  --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index c4560b6be2fc..da5b3c524e4b 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -10,6 +10,7 @@
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Tooling/Tooling.h"
+#include "clang/Tooling/Transformer/Parsing.h"
 #include "clang/Tooling/Transformer/SourceCode.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Testing/Support/Error.h"
@@ -132,13 +133,36 @@ TEST(RangeSelectorTest, BeforeOp) {
     int f(int x, int y, int z) { return 3; }
     int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
   )cc";
-  const char *Call = "call";
-  TestMatch Match = matchCode(Code, callExpr().bind(Call));
-  const auto* E = Match.Result.Nodes.getNodeAs<Expr>(Call);
+  StringRef CallID = "call";
+  ast_matchers::internal::Matcher<Stmt> M = callExpr().bind(CallID);
+  RangeSelector R = before(node(CallID.str()));
+
+  TestMatch Match = matchCode(Code, M);
+  const auto *E = Match.Result.Nodes.getNodeAs<Expr>(CallID);
   assert(E != nullptr);
   auto ExprBegin = E->getSourceRange().getBegin();
   EXPECT_THAT_EXPECTED(
-      before(node(Call))(Match.Result),
+      R(Match.Result),
+      HasValue(EqualsCharSourceRange(
+          CharSourceRange::getCharRange(ExprBegin, ExprBegin))));
+}
+
+TEST(RangeSelectorTest, BeforeOpParsed) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  StringRef CallID = "call";
+  ast_matchers::internal::Matcher<Stmt> M = callExpr().bind(CallID);
+  auto R = parseRangeSelector(R"rs(before(node("call")))rs");
+  ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+
+  TestMatch Match = matchCode(Code, M);
+  const auto *E = Match.Result.Nodes.getNodeAs<Expr>(CallID);
+  assert(E != nullptr);
+  auto ExprBegin = E->getSourceRange().getBegin();
+  EXPECT_THAT_EXPECTED(
+      (*R)(Match.Result),
       HasValue(EqualsCharSourceRange(
           CharSourceRange::getCharRange(ExprBegin, ExprBegin))));
 }
@@ -169,45 +193,82 @@ TEST(RangeSelectorTest, AfterOp) {
                        HasValue(EqualsCharSourceRange(ExpectedAfter)));
 }
 
-TEST(RangeSelectorTest, RangeOp) {
+// Node-id specific version.
+TEST(RangeSelectorTest, RangeOpNodes) {
   StringRef Code = R"cc(
     int f(int x, int y, int z) { return 3; }
     int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
   )cc";
-  const char *Arg0 = "a0";
-  const char *Arg1 = "a1";
-  StringRef Call = "call";
-  auto Matcher = callExpr(hasArgument(0, expr().bind(Arg0)),
-                          hasArgument(1, expr().bind(Arg1)))
-                     .bind(Call);
+  auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+                          hasArgument(1, expr().bind("a1")));
+  RangeSelector R = range("a0", "a1");
+  TestMatch Match = matchCode(Code, Matcher);
+  EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+}
+
+TEST(RangeSelectorTest, RangeOpGeneral) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+                          hasArgument(1, expr().bind("a1")));
+  RangeSelector R = range(node("a0"), node("a1"));
   TestMatch Match = matchCode(Code, Matcher);
+  EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+}
 
-  // Node-id specific version:
-  EXPECT_THAT_EXPECTED(select(range(Arg0, Arg1), Match), HasValue("3, 7"));
-  // General version:
-  EXPECT_THAT_EXPECTED(select(range(node(Arg0), node(Arg1)), Match),
-                       HasValue("3, 7"));
+TEST(RangeSelectorTest, RangeOpNodesParsed) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+                          hasArgument(1, expr().bind("a1")));
+  auto R = parseRangeSelector(R"rs(encloseNodes("a0", "a1"))rs");
+  ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+  TestMatch Match = matchCode(Code, Matcher);
+  EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
+}
+
+TEST(RangeSelectorTest, RangeOpGeneralParsed) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+                          hasArgument(1, expr().bind("a1")));
+  auto R = parseRangeSelector(R"rs(encloseNodes("a0", "a1"))rs");
+  ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+  TestMatch Match = matchCode(Code, Matcher);
+  EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
 }
 
 TEST(RangeSelectorTest, NodeOpStatement) {
   StringRef Code = "int f() { return 3; }";
-  const char *ID = "id";
-  TestMatch Match = matchCode(Code, returnStmt().bind(ID));
-  EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("return 3;"));
+  TestMatch Match = matchCode(Code, returnStmt().bind("id"));
+  EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("return 3;"));
 }
 
 TEST(RangeSelectorTest, NodeOpExpression) {
   StringRef Code = "int f() { return 3; }";
-  const char *ID = "id";
-  TestMatch Match = matchCode(Code, expr().bind(ID));
-  EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("3"));
+  TestMatch Match = matchCode(Code, expr().bind("id"));
+  EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("3"));
 }
 
 TEST(RangeSelectorTest, StatementOp) {
   StringRef Code = "int f() { return 3; }";
-  const char *ID = "id";
-  TestMatch Match = matchCode(Code, expr().bind(ID));
-  EXPECT_THAT_EXPECTED(select(statement(ID), Match), HasValue("3;"));
+  TestMatch Match = matchCode(Code, expr().bind("id"));
+  RangeSelector R = statement("id");
+  EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3;"));
+}
+
+TEST(RangeSelectorTest, StatementOpParsed) {
+  StringRef Code = "int f() { return 3; }";
+  TestMatch Match = matchCode(Code, expr().bind("id"));
+  auto R = parseRangeSelector(R"rs(statement("id"))rs");
+  ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+  EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3;"));
 }
 
 TEST(RangeSelectorTest, MemberOp) {


        


More information about the cfe-commits mailing list