[clang] [clang] Inject tokens containing #embed back into token stream (PR #97274)
Mariya Podchishchaeva via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 1 03:06:12 PDT 2024
https://github.com/Fznamznon created https://github.com/llvm/llvm-project/pull/97274
Instead of playing "whack a mole" with places where #embed should be expanded as comma-separated list, just inject each byte as a token back into the stream, separated by commas.
>From 4d5008fcf3ac37fa213c8f2cf42c3cce6369c83d Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Thu, 20 Jun 2024 06:04:07 -0700
Subject: [PATCH] [clang] Inject tokens containing #embed back into token
stream
Instead of playing "whack a mole" with places where #embed should be
expanded as comma-separated list, just inject each byte as a token back
into the stream, separated by commas.
---
clang/include/clang/Basic/TokenKinds.def | 3 ++
clang/include/clang/Basic/TokenKinds.h | 2 +-
clang/include/clang/Lex/Preprocessor.h | 5 +-
clang/include/clang/Parse/Parser.h | 3 +-
clang/lib/Parse/ParseExpr.cpp | 53 ++++++++++-----------
clang/lib/Parse/ParseTemplate.cpp | 41 +++++-----------
clang/lib/Sema/SemaExpr.cpp | 6 ++-
clang/test/Preprocessor/embed_codegen.cpp | 3 +-
clang/test/Preprocessor/embed_constexpr.cpp | 3 +-
clang/test/Preprocessor/embed_weird.cpp | 21 ++++----
10 files changed, 63 insertions(+), 77 deletions(-)
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 37d570ca5e75b..1bc9c59576f33 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -165,6 +165,9 @@ TOK(raw_identifier) // Used only in raw lexing mode.
// C99 6.4.4.2: Floating Constants
TOK(numeric_constant) // 0x123
+// Directly holds numerical value. Used to process C23 #embed.
+TOK(binary_data)
+
// C99 6.4.4: Character Constants
TOK(char_constant) // 'a'
TOK(wide_char_constant) // L'b'
diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h
index e5183a27d2bc5..1b133dde89587 100644
--- a/clang/include/clang/Basic/TokenKinds.h
+++ b/clang/include/clang/Basic/TokenKinds.h
@@ -98,7 +98,7 @@ inline bool isLiteral(TokenKind K) {
return K == tok::numeric_constant || K == tok::char_constant ||
K == tok::wide_char_constant || K == tok::utf8_char_constant ||
K == tok::utf16_char_constant || K == tok::utf32_char_constant ||
- isStringLiteral(K) || K == tok::header_name;
+ isStringLiteral(K) || K == tok::header_name || K == tok::binary_data;
}
/// Return true if this is any of tok::annot_* kinds.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index be3334b980746..8e30756da2a01 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2123,8 +2123,9 @@ class Preprocessor {
char
getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
bool *Invalid = nullptr) const {
- assert(Tok.is(tok::numeric_constant) &&
- Tok.getLength() == 1 && "Called on unsupported token");
+ assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
+ Tok.getLength() == 1 &&
+ "Called on unsupported token");
assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
// If the token is carrying a literal data pointer, just use it.
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 6880fa4bb0b03..7bc2280764c5b 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2123,7 +2123,7 @@ class Parser : public CodeCompletionHandler {
};
ExprResult ParseInitializerWithPotentialDesignator(DesignatorCompletionInfo);
ExprResult createEmbedExpr();
- void ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs);
+ void injectEmbedTokens();
//===--------------------------------------------------------------------===//
// clang Expressions
@@ -3830,7 +3830,6 @@ class Parser : public CodeCompletionHandler {
AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS,
ImplicitTypenameContext AllowImplicitTypename,
bool IsClassName = false);
- void ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs);
bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
TemplateTy Template, SourceLocation OpenLoc);
ParsedTemplateArgument ParseTemplateTemplateArgument();
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 9fc3cd73f73a0..a3b800a35b55e 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1018,6 +1018,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
// primary-expression
case tok::numeric_constant:
+ case tok::binary_data:
// constant: integer-constant
// constant: floating-constant
@@ -1067,18 +1068,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
}
case tok::annot_embed: {
- // We've met #embed in a context where a single value is expected. Take last
- // element from #embed data as if it were a comma expression.
- EmbedAnnotationData *Data =
- reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
- SourceLocation StartLoc = ConsumeAnnotationToken();
- ASTContext &Context = Actions.getASTContext();
- Res = IntegerLiteral::Create(Context,
- llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
- Context.UnsignedCharTy, StartLoc);
- if (Data->BinaryData.size() > 1)
- Diag(StartLoc, diag::warn_unused_comma_left_operand);
- break;
+ injectEmbedTokens();
+ return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast,
+ isVectorLiteral, NotPrimaryExpression);
}
case tok::kw___super:
@@ -3578,15 +3570,29 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS,
T.getCloseLocation());
}
-void Parser::ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs) {
+void Parser::injectEmbedTokens() {
EmbedAnnotationData *Data =
reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
- SourceLocation StartLoc = ConsumeAnnotationToken();
- ASTContext &Context = Actions.getASTContext();
- for (auto Byte : Data->BinaryData) {
- Exprs.push_back(IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
- Context.UnsignedCharTy, StartLoc));
+ MutableArrayRef<Token> Toks(
+ PP.getPreprocessorAllocator().Allocate<Token>(Data->BinaryData.size() * 2 - 1),
+ Data->BinaryData.size() * 2 - 1);
+ unsigned I = 0;
+ for (auto &Byte : Data->BinaryData) {
+ Toks[I].startToken();
+ Toks[I].setKind(tok::binary_data);
+ Toks[I].setLocation(Tok.getLocation());
+ Toks[I].setLength(1);
+ Toks[I].setLiteralData(&Byte);
+ if (I != ((Data->BinaryData.size() - 1) * 2)) {
+ Toks[I + 1].startToken();
+ Toks[I + 1].setKind(tok::comma);
+ Toks[I + 1].setLocation(Tok.getLocation());
+ }
+ I += 2;
}
+ PP.EnterTokenStream(std::move(Toks), /*DisableMacroExpansion=*/true,
+ /*IsReinject=*/false);
+ ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
}
/// ParseExpressionList - Used for C/C++ (argument-)expression-list.
@@ -3624,17 +3630,8 @@ bool Parser::ParseExpressionList(SmallVectorImpl<Expr *> &Exprs,
if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) {
Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists);
Expr = ParseBraceInitializer();
- } else if (Tok.is(tok::annot_embed)) {
- ExpandEmbedDirective(Exprs);
- if (Tok.isNot(tok::comma))
- break;
- Token Comma = Tok;
- ConsumeToken();
- checkPotentialAngleBracketDelimiter(Comma);
- continue;
- } else {
+ } else
Expr = ParseAssignmentExpression();
- }
if (EarlyTypoCorrection)
Expr = Actions.CorrectDelayedTyposInExpr(Expr);
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index 7e30afa2c64a4..a5130f56600e5 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -1523,19 +1523,6 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() {
ExprArg.get(), Loc);
}
-void Parser::ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs) {
- EmbedAnnotationData *Data =
- reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
- SourceLocation StartLoc = ConsumeAnnotationToken();
- ASTContext &Context = Actions.getASTContext();
- for (auto Byte : Data->BinaryData) {
- Expr *E = IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
- Context.UnsignedCharTy, StartLoc);
- TemplateArgs.push_back(
- ParsedTemplateArgument(ParsedTemplateArgument::NonType, E, StartLoc));
- }
-}
-
/// ParseTemplateArgumentList - Parse a C++ template-argument-list
/// (C++ [temp.names]). Returns true if there was an error.
///
@@ -1560,24 +1547,20 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
do {
PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp);
- if (Tok.is(tok::annot_embed)) {
- ExpandEmbedIntoTemplateArgList(TemplateArgs);
- } else {
- ParsedTemplateArgument Arg = ParseTemplateArgument();
- SourceLocation EllipsisLoc;
- if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
- Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);
-
- if (Arg.isInvalid()) {
- if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
- RunSignatureHelp();
- return true;
- }
-
- // Save this template argument.
- TemplateArgs.push_back(Arg);
+ ParsedTemplateArgument Arg = ParseTemplateArgument();
+ SourceLocation EllipsisLoc;
+ if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
+ Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);
+
+ if (Arg.isInvalid()) {
+ if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
+ RunSignatureHelp();
+ return true;
}
+ // Save this template argument.
+ TemplateArgs.push_back(Arg);
+
// If the next token is a comma, consume it and keep reading
// arguments.
} while (TryConsumeToken(tok::comma));
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index db44cfe1288b6..c5657b2389cd2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3722,9 +3722,11 @@ bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) {
ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
// Fast path for a single digit (which is quite common). A single digit
// cannot have a trigraph, escaped newline, radix prefix, or suffix.
- if (Tok.getLength() == 1) {
+ if (Tok.getLength() == 1 || Tok.getKind() == tok::binary_data) {
const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok);
- return ActOnIntegerConstant(Tok.getLocation(), Val-'0');
+ return ActOnIntegerConstant(
+ Tok.getLocation(),
+ (Tok.getKind() == tok::binary_data) ? Val : Val - '0');
}
SmallString<128> SpellingBuffer;
diff --git a/clang/test/Preprocessor/embed_codegen.cpp b/clang/test/Preprocessor/embed_codegen.cpp
index 64110afc162d7..201bf300bc669 100644
--- a/clang/test/Preprocessor/embed_codegen.cpp
+++ b/clang/test/Preprocessor/embed_codegen.cpp
@@ -43,8 +43,9 @@ a
};
// CHECK: store i32 107, ptr %b, align 4
-int b =
+int b = (
#embed<jk.txt>
+ )
;
diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp
index 1cadff76b4890..a7857641a2e8d 100644
--- a/clang/test/Preprocessor/embed_constexpr.cpp
+++ b/clang/test/Preprocessor/embed_constexpr.cpp
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions
+// expected-no-diagnostics
constexpr int value(int a, int b) {
return a + b;
@@ -46,7 +47,7 @@ int array[
static_assert(sizeof(array) / sizeof(int) == 'j');
constexpr int comma_expr = (
-#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+#embed <jk.txt>
);
static_assert(comma_expr == 'k');
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
index 31b622c848d6a..cc73a88e5a657 100644
--- a/clang/test/Preprocessor/embed_weird.cpp
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -27,7 +27,7 @@ _Static_assert(
_Static_assert(sizeof(
#embed <single_byte.txt>
) ==
-sizeof(unsigned char)
+sizeof(int)
, ""
);
_Static_assert(sizeof
@@ -35,9 +35,9 @@ _Static_assert(sizeof
, ""
);
_Static_assert(sizeof(
-#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+#embed <jk.txt>
) ==
-sizeof(unsigned char)
+sizeof(int)
, ""
);
@@ -73,10 +73,10 @@ void do_stuff() {
// Ensure that we don't accidentally allow you to initialize an unsigned char *
// from embedded data; the data is modeled as a string literal internally, but
// is not actually a string literal.
-const unsigned char *ptr =
+const unsigned char *ptr = (
#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
-; // c-error at -2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \
- cxx-error at -2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}}
+ ); // c-error at -2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'int'}} \
+ cxx-error at -2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'int'}}
// However, there are some cases where this is fine and should work.
const unsigned char *null_ptr_1 =
@@ -101,11 +101,10 @@ constexpr unsigned char ch =
;
static_assert(ch == 0);
-void foobar(float x, char y, char z); // cxx-note {{candidate function not viable: requires 3 arguments, but 1 was provided}}
- // c-note at -1 {{declared here}}
-void g1() { foobar((float) // cxx-error {{no matching function for call to 'foobar'}}
-#embed "numbers.txt" limit(3) // expected-warning {{left operand of comma operator has no effect}}
-); // c-error {{too few arguments to function call, expected 3, have 1}}
+void foobar(float x, char y, char z);
+void g1() { foobar((float)
+#embed "numbers.txt" limit(3)
+);
}
#if __cplusplus
More information about the cfe-commits
mailing list