[clang] 9ca50e8 - [libTooling] Add parser for string representation of `RangeSelector`.
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 18 18:12:42 PDT 2020
Author: Yitzhak Mandelbaum
Date: 2020-06-19T01:11:29Z
New Revision: 9ca50e887db7f903c04a90593d2beed8a96794f1
URL: https://github.com/llvm/llvm-project/commit/9ca50e887db7f903c04a90593d2beed8a96794f1
DIFF: https://github.com/llvm/llvm-project/commit/9ca50e887db7f903c04a90593d2beed8a96794f1.diff
LOG: [libTooling] Add parser for string representation of `RangeSelector`.
This patch adds a parser for a `RangeSelector` written as a string. The format
is closely based on the way one would right the selector in C++. This should
enable use of `RangeSelector`s from tools like clang-query and web UIs.
Added:
clang/include/clang/Tooling/Transformer/Parsing.h
clang/lib/Tooling/Transformer/Parsing.cpp
Modified:
clang/lib/Tooling/Transformer/CMakeLists.txt
clang/unittests/Tooling/RangeSelectorTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/Transformer/Parsing.h b/clang/include/clang/Tooling/Transformer/Parsing.h
new file mode 100644
index 000000000000..8e51f595cd5b
--- /dev/null
+++ b/clang/include/clang/Tooling/Transformer/Parsing.h
@@ -0,0 +1,41 @@
+//===--- Parsing.h - Parsing library for Transformer ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Defines parsing functions for Transformer types.
+/// FIXME: Currently, only supports `RangeSelectors` but parsers for other
+/// Transformer types are under development.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_
+#define LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Tooling/Transformer/RangeSelector.h"
+#include "llvm/Support/Error.h"
+#include <functional>
+#include <string>
+
+namespace clang {
+namespace transformer {
+
+/// Parses a string representation of a \c RangeSelector. The grammar of these
+/// strings is closely based on the (sub)grammar of \c RangeSelectors as they'd
+/// appear in C++ code. However, this language constrains the set of permissible
+/// strings (for node ids) -- it does not support escapes in the
+/// string. Additionally, the \c charRange combinator is not supported, because
+/// there is no representation of values of type \c CharSourceRange in this
+/// (little) language.
+llvm::Expected<RangeSelector> parseRangeSelector(llvm::StringRef Input);
+
+} // namespace transformer
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_
diff --git a/clang/lib/Tooling/Transformer/CMakeLists.txt b/clang/lib/Tooling/Transformer/CMakeLists.txt
index 281af1007a65..150b71b1ffcd 100644
--- a/clang/lib/Tooling/Transformer/CMakeLists.txt
+++ b/clang/lib/Tooling/Transformer/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
)
add_clang_library(clangTransformer
+ Parsing.cpp
RangeSelector.cpp
RewriteRule.cpp
SourceCode.cpp
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
new file mode 100644
index 000000000000..1579115b9313
--- /dev/null
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -0,0 +1,279 @@
+//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Transformer/Parsing.h"
+#include "clang/AST/Expr.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Transformer/RangeSelector.h"
+#include "clang/Tooling/Transformer/SourceCode.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+using namespace transformer;
+
+// FIXME: This implementation is entirely separate from that of the AST
+// matchers. Given the similarity of the languages and uses of the two parsers,
+// the two should share a common parsing infrastructure, as should other
+// Transformer types. We intend to unify this implementation soon to share as
+// much as possible with the AST Matchers parsing.
+
+namespace {
+using llvm::Error;
+using llvm::Expected;
+
+template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
+
+struct ParseState {
+ // The remaining input to be processed.
+ StringRef Input;
+ // The original input. Not modified during parsing; only for reference in
+ // error reporting.
+ StringRef OriginalInput;
+};
+
+// Represents an intermediate result returned by a parsing function. Functions
+// that don't generate values should use `llvm::None`
+template <typename ResultType> struct ParseProgress {
+ ParseState State;
+ // Intermediate result generated by the Parser.
+ ResultType Value;
+};
+
+template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
+template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
+
+class ParseError : public llvm::ErrorInfo<ParseError> {
+public:
+ // Required field for all ErrorInfo derivatives.
+ static char ID;
+
+ ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
+ : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
+ Excerpt(std::move(InputExcerpt)) {}
+
+ void log(llvm::raw_ostream &OS) const override {
+ OS << "parse error at position (" << Pos << "): " << ErrorMsg
+ << ": " + Excerpt;
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+
+ // Position of the error in the input string.
+ size_t Pos;
+ std::string ErrorMsg;
+ // Excerpt of the input starting at the error position.
+ std::string Excerpt;
+};
+
+char ParseError::ID;
+} // namespace
+
+static const llvm::StringMap<RangeSelectorOp<std::string>> &
+getUnaryStringSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
+ {"name", name},
+ {"node", node},
+ {"statement", statement},
+ {"statements", statements},
+ {"member", member},
+ {"callArgs", callArgs},
+ {"elseBranch", elseBranch},
+ {"initListElements", initListElements}};
+ return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
+getUnaryRangeSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
+ {"before", before}, {"after", after}, {"expansion", expansion}};
+ return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
+getBinaryStringSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
+ {"encloseNodes", range}};
+ return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
+getBinaryRangeSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
+ M = {{"enclose", range}};
+ return M;
+}
+
+template <typename Element>
+llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
+ llvm::StringRef Key) {
+ auto it = Map.find(Key);
+ if (it == Map.end())
+ return llvm::None;
+ return it->second;
+}
+
+template <typename ResultType>
+ParseProgress<ResultType> makeParseProgress(ParseState State,
+ ResultType Result) {
+ return ParseProgress<ResultType>{State, std::move(Result)};
+}
+
+static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
+ size_t Pos = S.OriginalInput.size() - S.Input.size();
+ return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
+ S.OriginalInput.substr(Pos, 20).str());
+}
+
+// Returns a new ParseState that advances \c S by \c N characters.
+static ParseState advance(ParseState S, size_t N) {
+ S.Input = S.Input.drop_front(N);
+ return S;
+}
+
+static StringRef consumeWhitespace(StringRef S) {
+ return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); });
+}
+
+// Parses a single expected character \c c from \c State, skipping preceding
+// whitespace. Error if the expected character isn't found.
+static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
+ State.Input = consumeWhitespace(State.Input);
+ if (State.Input.empty() || State.Input.front() != c)
+ return makeParseError(State,
+ ("expected char not found: " + llvm::Twine(c)).str());
+ return makeParseProgress(advance(State, 1), llvm::None);
+}
+
+// Parses an identitifer "token" -- handles preceding whitespace.
+static ExpectedProgress<std::string> parseId(ParseState State) {
+ State.Input = consumeWhitespace(State.Input);
+ auto Id = State.Input.take_while(
+ [](char c) { return c >= 0 && isIdentifierBody(c); });
+ if (Id.empty())
+ return makeParseError(State, "failed to parse name");
+ return makeParseProgress(advance(State, Id.size()), Id.str());
+}
+
+// For consistency with the AST matcher parser and C++ code, node ids are
+// written as strings. However, we do not support escaping in the string.
+static ExpectedProgress<std::string> parseStringId(ParseState State) {
+ State.Input = consumeWhitespace(State.Input);
+ if (State.Input.empty())
+ return makeParseError(State, "unexpected end of input");
+ if (!State.Input.consume_front("\""))
+ return makeParseError(
+ State,
+ "expecting string, but encountered other character or end of input");
+
+ StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
+ if (State.Input.size() == Id.size())
+ return makeParseError(State, "unterminated string");
+ // Advance past the trailing quote as well.
+ return makeParseProgress(advance(State, Id.size() + 1), Id.str());
+}
+
+// Parses a single element surrounded by parens. `Op` is applied to the parsed
+// result to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
+ RangeSelectorOp<T> Op,
+ ParseState State) {
+ auto P = parseChar('(', State);
+ if (!P)
+ return P.takeError();
+
+ auto E = ParseElement(P->State);
+ if (!E)
+ return E.takeError();
+
+ P = parseChar(')', E->State);
+ if (!P)
+ return P.takeError();
+
+ return makeParseProgress(P->State, Op(std::move(E->Value)));
+}
+
+// Parses a pair of elements surrounded by parens and separated by comma. `Op`
+// is applied to the parsed results to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
+ RangeSelectorOp<T, T> Op,
+ ParseState State) {
+ auto P = parseChar('(', State);
+ if (!P)
+ return P.takeError();
+
+ auto Left = ParseElement(P->State);
+ if (!Left)
+ return Left.takeError();
+
+ P = parseChar(',', Left->State);
+ if (!P)
+ return P.takeError();
+
+ auto Right = ParseElement(P->State);
+ if (!Right)
+ return Right.takeError();
+
+ P = parseChar(')', Right->State);
+ if (!P)
+ return P.takeError();
+
+ return makeParseProgress(P->State,
+ Op(std::move(Left->Value), std::move(Right->Value)));
+}
+
+// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
+// Id operator). Returns StencilType representing the operator on success and
+// error if it fails to parse input for an operator.
+static ExpectedProgress<RangeSelector>
+parseRangeSelectorImpl(ParseState State) {
+ auto Id = parseId(State);
+ if (!Id)
+ return Id.takeError();
+
+ std::string OpName = std::move(Id->Value);
+ if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
+ return parseSingle(parseStringId, *Op, Id->State);
+
+ if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
+ return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
+
+ if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
+ return parsePair(parseStringId, *Op, Id->State);
+
+ if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
+ return parsePair(parseRangeSelectorImpl, *Op, Id->State);
+
+ return makeParseError(State, "unknown selector name: " + OpName);
+}
+
+Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
+ ParseState State = {Input, Input};
+ ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
+ if (!Result)
+ return Result.takeError();
+ State = Result->State;
+ // Discard any potentially trailing whitespace.
+ State.Input = consumeWhitespace(State.Input);
+ if (State.Input.empty())
+ return Result->Value;
+ return makeParseError(State, "unexpected input after selector");
+}
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index c4560b6be2fc..da5b3c524e4b 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -10,6 +10,7 @@
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Tooling/Tooling.h"
+#include "clang/Tooling/Transformer/Parsing.h"
#include "clang/Tooling/Transformer/SourceCode.h"
#include "llvm/Support/Error.h"
#include "llvm/Testing/Support/Error.h"
@@ -132,13 +133,36 @@ TEST(RangeSelectorTest, BeforeOp) {
int f(int x, int y, int z) { return 3; }
int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
)cc";
- const char *Call = "call";
- TestMatch Match = matchCode(Code, callExpr().bind(Call));
- const auto* E = Match.Result.Nodes.getNodeAs<Expr>(Call);
+ StringRef CallID = "call";
+ ast_matchers::internal::Matcher<Stmt> M = callExpr().bind(CallID);
+ RangeSelector R = before(node(CallID.str()));
+
+ TestMatch Match = matchCode(Code, M);
+ const auto *E = Match.Result.Nodes.getNodeAs<Expr>(CallID);
assert(E != nullptr);
auto ExprBegin = E->getSourceRange().getBegin();
EXPECT_THAT_EXPECTED(
- before(node(Call))(Match.Result),
+ R(Match.Result),
+ HasValue(EqualsCharSourceRange(
+ CharSourceRange::getCharRange(ExprBegin, ExprBegin))));
+}
+
+TEST(RangeSelectorTest, BeforeOpParsed) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ StringRef CallID = "call";
+ ast_matchers::internal::Matcher<Stmt> M = callExpr().bind(CallID);
+ auto R = parseRangeSelector(R"rs(before(node("call")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+
+ TestMatch Match = matchCode(Code, M);
+ const auto *E = Match.Result.Nodes.getNodeAs<Expr>(CallID);
+ assert(E != nullptr);
+ auto ExprBegin = E->getSourceRange().getBegin();
+ EXPECT_THAT_EXPECTED(
+ (*R)(Match.Result),
HasValue(EqualsCharSourceRange(
CharSourceRange::getCharRange(ExprBegin, ExprBegin))));
}
@@ -169,45 +193,82 @@ TEST(RangeSelectorTest, AfterOp) {
HasValue(EqualsCharSourceRange(ExpectedAfter)));
}
-TEST(RangeSelectorTest, RangeOp) {
+// Node-id specific version.
+TEST(RangeSelectorTest, RangeOpNodes) {
StringRef Code = R"cc(
int f(int x, int y, int z) { return 3; }
int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
)cc";
- const char *Arg0 = "a0";
- const char *Arg1 = "a1";
- StringRef Call = "call";
- auto Matcher = callExpr(hasArgument(0, expr().bind(Arg0)),
- hasArgument(1, expr().bind(Arg1)))
- .bind(Call);
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")));
+ RangeSelector R = range("a0", "a1");
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+}
+
+TEST(RangeSelectorTest, RangeOpGeneral) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")));
+ RangeSelector R = range(node("a0"), node("a1"));
TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+}
- // Node-id specific version:
- EXPECT_THAT_EXPECTED(select(range(Arg0, Arg1), Match), HasValue("3, 7"));
- // General version:
- EXPECT_THAT_EXPECTED(select(range(node(Arg0), node(Arg1)), Match),
- HasValue("3, 7"));
+TEST(RangeSelectorTest, RangeOpNodesParsed) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")));
+ auto R = parseRangeSelector(R"rs(encloseNodes("a0", "a1"))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
+}
+
+TEST(RangeSelectorTest, RangeOpGeneralParsed) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")));
+ auto R = parseRangeSelector(R"rs(encloseNodes("a0", "a1"))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
}
TEST(RangeSelectorTest, NodeOpStatement) {
StringRef Code = "int f() { return 3; }";
- const char *ID = "id";
- TestMatch Match = matchCode(Code, returnStmt().bind(ID));
- EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("return 3;"));
+ TestMatch Match = matchCode(Code, returnStmt().bind("id"));
+ EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("return 3;"));
}
TEST(RangeSelectorTest, NodeOpExpression) {
StringRef Code = "int f() { return 3; }";
- const char *ID = "id";
- TestMatch Match = matchCode(Code, expr().bind(ID));
- EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("3"));
+ TestMatch Match = matchCode(Code, expr().bind("id"));
+ EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("3"));
}
TEST(RangeSelectorTest, StatementOp) {
StringRef Code = "int f() { return 3; }";
- const char *ID = "id";
- TestMatch Match = matchCode(Code, expr().bind(ID));
- EXPECT_THAT_EXPECTED(select(statement(ID), Match), HasValue("3;"));
+ TestMatch Match = matchCode(Code, expr().bind("id"));
+ RangeSelector R = statement("id");
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3;"));
+}
+
+TEST(RangeSelectorTest, StatementOpParsed) {
+ StringRef Code = "int f() { return 3; }";
+ TestMatch Match = matchCode(Code, expr().bind("id"));
+ auto R = parseRangeSelector(R"rs(statement("id"))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3;"));
}
TEST(RangeSelectorTest, MemberOp) {
More information about the cfe-commits
mailing list