[clang-tools-extra] 7dc3c61 - [pseudo] Strip directives from a token stream
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Fri May 6 03:15:15 PDT 2022
Author: Sam McCall
Date: 2022-05-06T12:15:08+02:00
New Revision: 7dc3c6190ec7191dd104fa5158fe0ee32e9b0c49
URL: https://github.com/llvm/llvm-project/commit/7dc3c6190ec7191dd104fa5158fe0ee32e9b0c49
DIFF: https://github.com/llvm/llvm-project/commit/7dc3c6190ec7191dd104fa5158fe0ee32e9b0c49.diff
LOG: [pseudo] Strip directives from a token stream
This includes only the taken branch of conditional sections.
The API allows for producing a stream for a particular PP branch, which
will be used later for the secondary GLR parses of not-taken branches.
Differential Revision: https://reviews.llvm.org/D123243
Added:
clang-tools-extra/pseudo/test/strip-directives.c
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
clang-tools-extra/pseudo/lib/DirectiveTree.cpp
clang-tools-extra/pseudo/test/lex.c
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h b/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
index 5cd41efcb2ace..e8220537649f9 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
@@ -92,7 +92,11 @@ struct DirectiveTree {
/// Extract preprocessor structure by examining the raw tokens.
static DirectiveTree parse(const TokenStream &);
- // FIXME: allow deriving a preprocessed stream
+ /// Produce a parseable token stream by stripping all directive tokens.
+ ///
+ /// Conditional sections are replaced by the taken branch, if any.
+ /// This tree must describe the provided token stream.
+ TokenStream stripDirectives(const TokenStream &) const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Chunk &);
diff --git a/clang-tools-extra/pseudo/lib/DirectiveTree.cpp b/clang-tools-extra/pseudo/lib/DirectiveTree.cpp
index f464e3bc3ba66..82843125329b8 100644
--- a/clang-tools-extra/pseudo/lib/DirectiveTree.cpp
+++ b/clang-tools-extra/pseudo/lib/DirectiveTree.cpp
@@ -347,5 +347,53 @@ void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) {
BranchChooser{Code}.choose(Tree);
}
+namespace {
+class Preprocessor {
+ const TokenStream &In;
+ TokenStream &Out;
+
+public:
+ Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
+ ~Preprocessor() { Out.finalize(); }
+
+ void walk(const DirectiveTree &T) {
+ for (const auto &C : T.Chunks)
+ walk(C);
+ }
+
+ void walk(const DirectiveTree::Chunk &C) {
+ switch (C.kind()) {
+ case DirectiveTree::Chunk::K_Code:
+ return walk((const DirectiveTree::Code &)C);
+ case DirectiveTree::Chunk::K_Directive:
+ return walk((const DirectiveTree::Directive &)C);
+ case DirectiveTree::Chunk::K_Conditional:
+ return walk((const DirectiveTree::Conditional &)C);
+ case DirectiveTree::Chunk::K_Empty:
+ break;
+ }
+ llvm_unreachable("bad chunk kind");
+ }
+
+ void walk(const DirectiveTree::Code &C) {
+ for (const auto &Tok : In.tokens(C.Tokens))
+ Out.push(Tok);
+ }
+
+ void walk(const DirectiveTree::Directive &) {}
+
+ void walk(const DirectiveTree::Conditional &C) {
+ if (C.Taken)
+ walk(C.Branches[*C.Taken].second);
+ }
+};
+} // namespace
+
+TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const {
+ TokenStream Out;
+ Preprocessor(In, Out).walk(*this);
+ return Out;
+}
+
} // namespace pseudo
} // namespace clang
diff --git a/clang-tools-extra/pseudo/test/lex.c b/clang-tools-extra/pseudo/test/lex.c
index c1bf9296a8cce..ebebd2e0fb72f 100644
--- a/clang-tools-extra/pseudo/test/lex.c
+++ b/clang-tools-extra/pseudo/test/lex.c
@@ -18,7 +18,7 @@ SOURCE-NEXT: #end
SOURCE-NEXT: }
RUN: clang-pseudo -source %s -print-tokens | FileCheck %s -check-prefix=TOKEN
- TOKEN: 0: raw_identifier 0:0 "int" flags=1
+ TOKEN: 0: raw_identifier 0:0 "int" flags=1
TOKEN-NEXT: raw_identifier 0:0 "is_debug"
TOKEN-NEXT: l_paren 0:0 "("
TOKEN-NEXT: r_paren 0:0 ")"
@@ -39,14 +39,4 @@ TOKEN-NEXT: hash 5:0 "#" flags=1
TOKEN-NEXT: raw_identifier 5:0 "endif"
TOKEN-NEXT: r_brace 6:0 "}" flags=1
-RUN: clang-pseudo -source %s -print-directive-tree | FileCheck %s -check-prefix=PPT --strict-whitespace
- PPT: code (5 tokens)
-PPT-NEXT: #ifndef (3 tokens) TAKEN
-PPT-NEXT: code (4 tokens)
-PPT-NEXT: #else (2 tokens)
-PPT-NEXT: code (3 tokens)
-PPT-NEXT: #endif (2 tokens)
-PPT-NEXT: code (2 tokens)
- ^ including this block comment
-
*******************************************************************************/
diff --git a/clang-tools-extra/pseudo/test/strip-directives.c b/clang-tools-extra/pseudo/test/strip-directives.c
new file mode 100644
index 0000000000000..c7878d9295a08
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/strip-directives.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+int main() {
+#error This was inevitable...
+#if HELLO
+ printf("hello, world\n");
+ return 0;
+#else
+ abort();
+#endif
+}
+
+/* This comment gets lexed along with the input above! We just don't CHECK it.
+
+RUN: clang-pseudo -source %s -print-directive-tree | FileCheck %s -check-prefix=PPT --strict-whitespace
+ PPT: #include (7 tokens)
+PPT-NEXT: code (5 tokens)
+PPT-NEXT: #error (6 tokens)
+PPT-NEXT: #if (3 tokens) TAKEN
+PPT-NEXT: code (8 tokens)
+PPT-NEXT: #else (2 tokens)
+PPT-NEXT: code (4 tokens)
+PPT-NEXT: #endif (2 tokens)
+PPT-NEXT: code (2 tokens)
+ ^ including this block comment
+
+RUN: clang-pseudo -source %s -strip-directives -print-source | FileCheck %s --strict-whitespace
+ CHECK: int main() {
+CHECK-NEXT: printf("hello, world\n");
+CHECK-NEXT: return 0;
+CHECK-NEXT: }
+
+RUN: clang-pseudo -source %s -strip-directives -print-tokens | FileCheck %s --check-prefix=TOKEN
+ TOKEN: 0: raw_identifier 1:0 "int" flags=1
+TOKEN-NEXT: raw_identifier 1:0 "main"
+TOKEN-NEXT: l_paren 1:0 "("
+TOKEN-NEXT: r_paren 1:0 ")"
+TOKEN-NEXT: l_brace 1:0 "{"
+TOKEN-NEXT: raw_identifier 4:2 "printf" flags=1
+TOKEN-NEXT: l_paren 4:2 "("
+TOKEN-NEXT: string_literal 4:2 "\22hello, world\\n\22"
+TOKEN-NEXT: r_paren 4:2 ")"
+TOKEN-NEXT: semi 4:2 ";"
+TOKEN-NEXT: raw_identifier 5:2 "return" flags=1
+TOKEN-NEXT: numeric_constant 5:2 "0"
+TOKEN-NEXT: semi 5:2 ";"
+TOKEN-NEXT: r_brace 9:0 "}" flags=1
+
+*******************************************************************************/
+
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index d8517262faf1f..47a0b2a10ae7e 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/Signals.h"
using clang::pseudo::Grammar;
+using clang::pseudo::TokenStream;
using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
@@ -37,6 +38,9 @@ static opt<bool> PrintTokens("print-tokens", desc("Print detailed token info"));
static opt<bool>
PrintDirectiveTree("print-directive-tree",
desc("Print directive structure of source code"));
+static opt<bool>
+ StripDirectives("strip-directives",
+ desc("Strip directives and select conditional sections"));
static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
@@ -58,22 +62,30 @@ int main(int argc, char *argv[]) {
clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
std::string SourceText;
llvm::Optional<clang::pseudo::TokenStream> RawStream;
- llvm::Optional<clang::pseudo::DirectiveTree> DirectiveStructure;
+ llvm::Optional<TokenStream> PreprocessedStream;
llvm::Optional<clang::pseudo::TokenStream> ParseableStream;
if (Source.getNumOccurrences()) {
SourceText = readOrDie(Source);
RawStream = clang::pseudo::lex(SourceText, LangOpts);
- DirectiveStructure = clang::pseudo::DirectiveTree::parse(*RawStream);
- clang::pseudo::chooseConditionalBranches(*DirectiveStructure, *RawStream);
+ TokenStream *Stream = RawStream.getPointer();
+
+ auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(*RawStream);
+ clang::pseudo::chooseConditionalBranches(DirectiveStructure, *RawStream);
+
+ llvm::Optional<TokenStream> Preprocessed;
+ if (StripDirectives) {
+ Preprocessed = DirectiveStructure.stripDirectives(*Stream);
+ Stream = Preprocessed.getPointer();
+ }
- if (PrintDirectiveTree)
- llvm::outs() << DirectiveStructure;
if (PrintSource)
- RawStream->print(llvm::outs());
+ Stream->print(llvm::outs());
if (PrintTokens)
- llvm::outs() << RawStream;
+ llvm::outs() << *Stream;
+ if (PrintDirectiveTree)
+ llvm::outs() << DirectiveStructure;
- ParseableStream = clang::pseudo::stripComments(cook(*RawStream, LangOpts));
+ ParseableStream = clang::pseudo::stripComments(cook(*Stream, LangOpts));
}
if (Grammar.getNumOccurrences()) {
diff --git a/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp b/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
index 476726c7fedc1..f8732e28c5e1f 100644
--- a/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
@@ -27,14 +27,23 @@ using testing::Pair;
using testing::StrEq;
using Chunk = DirectiveTree::Chunk;
-MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) {
+// Matches text of a list of tokens against a string (joined with spaces).
+// e.g. EXPECT_THAT(Stream.tokens(), tokens("int main ( ) { }"));
+MATCHER_P(tokens, Tokens, "") {
std::vector<llvm::StringRef> Texts;
- for (const Token &Tok : TS.tokens(arg.Tokens))
+ for (const Token &Tok : arg)
Texts.push_back(Tok.text());
return Matcher<std::string>(StrEq(Tokens))
.MatchAndExplain(llvm::join(Texts, " "), result_listener);
}
+// Matches tokens covered a directive chunk (with a Tokens property) against a
+// string, similar to tokens() above.
+// e.g. EXPECT_THAT(SomeDirective, tokensAre(Stream, "# include < vector >"));
+MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) {
+ return testing::Matches(tokens(Tokens))(TS.tokens(arg.Tokens));
+}
+
MATCHER_P(chunkKind, K, "") { return arg.kind() == K; }
TEST(DirectiveTree, Parse) {
@@ -301,6 +310,45 @@ TEST(DirectiveTree, ChooseBranches) {
}
}
+TEST(DirectiveTree, StripDirectives) {
+ LangOptions Opts;
+ std::string Code = R"cpp(
+ #include <stddef.h>
+ a a a
+ #warning AAA
+ b b b
+ #if 1
+ c c c
+ #warning BBB
+ #if 0
+ d d d
+ #warning CC
+ #else
+ e e e
+ #endif
+ f f f
+ #if 0
+ g g g
+ #endif
+ h h h
+ #else
+ i i i
+ #endif
+ j j j
+ )cpp";
+ TokenStream S = lex(Code, Opts);
+
+ DirectiveTree Tree = DirectiveTree::parse(S);
+ chooseConditionalBranches(Tree, S);
+ EXPECT_THAT(Tree.stripDirectives(S).tokens(),
+ tokens("a a a b b b c c c e e e f f f h h h j j j"));
+
+ const DirectiveTree &Part =
+ ((const DirectiveTree::Conditional &)Tree.Chunks[4]).Branches[0].second;
+ EXPECT_THAT(Part.stripDirectives(S).tokens(),
+ tokens("c c c e e e f f f h h h"));
+}
+
} // namespace
} // namespace pseudo
} // namespace clang
More information about the cfe-commits
mailing list