[clang-tools-extra] 1579090 - Reland "[pseudo] Split greatergreater token."
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 22 02:28:19 PDT 2022
Author: Haojian Wu
Date: 2022-03-22T10:27:52+01:00
New Revision: 1579090141c5cc061f3a0b62cd92bd93802ddcf7
URL: https://github.com/llvm/llvm-project/commit/1579090141c5cc061f3a0b62cd92bd93802ddcf7
DIFF: https://github.com/llvm/llvm-project/commit/1579090141c5cc061f3a0b62cd92bd93802ddcf7.diff
LOG: Reland "[pseudo] Split greatergreater token."
It was reverted, because the test had a lift-time issue.
Reland f66d3758bda99e9f57bfdad168212feda18792ae with a fix.
Added:
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/Token.h
clang-tools-extra/pseudo/lib/Lex.cpp
clang-tools-extra/pseudo/lib/cxx.bnf
clang-tools-extra/pseudo/unittests/TokenTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h
index 24b6729151e61..4563477b2c4fe 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h
@@ -180,7 +180,8 @@ enum class LexFlags : uint8_t {
NeedsCleaning = 1 << 1,
};
-/// Derives a token stream by decoding escapes and interpreting raw_identifiers.
+/// Derives a token stream by decoding escapes, interpreting raw_identifiers and
+/// splitting the greatergreater token.
///
/// Tokens containing UCNs, escaped newlines, trigraphs etc are decoded and
/// their backing data is owned by the returned stream.
diff --git a/clang-tools-extra/pseudo/lib/Lex.cpp b/clang-tools-extra/pseudo/lib/Lex.cpp
index f5a239533c532..e99bf3a63e5e1 100644
--- a/clang-tools-extra/pseudo/lib/Lex.cpp
+++ b/clang-tools-extra/pseudo/lib/Lex.cpp
@@ -98,9 +98,21 @@ TokenStream cook(const TokenStream &Code, const LangOptions &LangOpts) {
Tok.Length = Text.size();
Tok.Flags &= ~static_cast<decltype(Tok.Flags)>(LexFlags::NeedsCleaning);
}
- // Cook raw_identifiers into identifier, keyword, etc.
- if (Tok.Kind == tok::raw_identifier)
+
+ if (Tok.Kind == tok::raw_identifier) {
+ // Cook raw_identifiers into identifier, keyword, etc.
Tok.Kind = Identifiers.get(Tok.text()).getTokenID();
+ } else if (Tok.Kind == tok::greatergreater) {
+ // Split the greatergreater token.
+ // FIXME: split lessless token to support Cuda triple angle brackets <<<.
+ assert(Tok.text() == ">>");
+ Tok.Kind = tok::greater;
+ Tok.Length = 1;
+ Result.push(Tok);
+ // Line is wrong if the first greater is followed by an escaped newline!
+ Tok.Data = Tok.text().data() + 1;
+ }
+
Result.push(std::move(Tok));
}
diff --git a/clang-tools-extra/pseudo/lib/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx.bnf
index 48bf4621eefe5..cf664b8e13e55 100644
--- a/clang-tools-extra/pseudo/lib/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx.bnf
@@ -13,6 +13,9 @@
# - the file merely describes the core C++ grammar. Preprocessor directives and
# lexical conversions are omitted as we reuse clang's lexer and run a fake
# preprocessor;
+# - grammar rules with the >> token are adjusted, the greatergreater token is
+# split into two > tokens, to make the GLR parser aware of nested templates
+# and right shift operator;
#
# Guidelines:
# - nonterminals are lower_case; terminals (aka tokens) correspond to
@@ -96,7 +99,7 @@ fold-operator := %
fold-operator := ^
fold-operator := |
fold-operator := <<
-fold-operator := >>
+fold-operator := greatergreater
fold-operator := +=
fold-operator := -=
fold-operator := *=
@@ -202,7 +205,7 @@ additive-expression := additive-expression - multiplicative-expression
# expr.shift
shift-expression := additive-expression
shift-expression := shift-expression << additive-expression
-shift-expression := shift-expression >> additive-expression
+shift-expression := shift-expression greatergreater additive-expression
# expr.spaceship
compare-expression := shift-expression
compare-expression := compare-expression <=> shift-expression
@@ -615,7 +618,7 @@ operator-name := <=>
operator-name := ^^
operator-name := ||
operator-name := <<
-operator-name := >>
+operator-name := greatergreater
operator-name := <<=
operator-name := >>=
operator-name := ++
@@ -737,3 +740,8 @@ contextual-zero := NUMERIC_CONSTANT
module-keyword := IDENTIFIER
import-keyword := IDENTIFIER
export-keyword := IDENTIFIER
+
+#! greatergreater token -- clang lexer always lexes it as a single token, we
+#! split it into two tokens to make the GLR parser aware of the nested-template
+#! case.
+greatergreater := > >
diff --git a/clang-tools-extra/pseudo/unittests/TokenTest.cpp b/clang-tools-extra/pseudo/unittests/TokenTest.cpp
index 1357d23501193..8280a9b29341e 100644
--- a/clang-tools-extra/pseudo/unittests/TokenTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/TokenTest.cpp
@@ -171,6 +171,26 @@ no_indent \
}));
}
+TEST(TokenTest, SplitGreaterGreater) {
+ LangOptions Opts;
+ std::string Code = R"cpp(
+>> // split
+// >> with an escaped newline in the middle, split
+>\
+>
+>>= // not split
+)cpp";
+ TokenStream Cook = cook(lex(Code, Opts), Opts);
+ TokenStream Split = stripComments(Cook);
+ EXPECT_THAT(Split.tokens(), ElementsAreArray({
+ token(">", tok::greater),
+ token(">", tok::greater),
+ token(">", tok::greater),
+ token(">", tok::greater),
+ token(">>=", tok::greatergreaterequal),
+ }));
+}
+
TEST(TokenTest, DropComments) {
LangOptions Opts;
std::string Code = R"cpp(
More information about the cfe-commits
mailing list