[flang-commits] [clang-tools-extra] [lldb] [compiler-rt] [llvm] [libc] [libcxx] [flang] [clang] ✨ [Sema, Lex, Parse] Preprocessor embed in C and C++ (and Obj-C and Obj-C++ by-proxy) (PR #68620)
Aaron Ballman via flang-commits
flang-commits at lists.llvm.org
Mon Nov 13 10:25:07 PST 2023
https://github.com/AaronBallman updated https://github.com/llvm/llvm-project/pull/68620
>From 7050c932f63f9cb9e94636b287887f8241083117 Mon Sep 17 00:00:00 2001
From: ThePhD <phdofthehouse at gmail.com>
Date: Thu, 28 Sep 2023 18:31:34 -0400
Subject: [PATCH 01/29] =?UTF-8?q?=E2=9C=A8=20[Sema,=20Driver,=20Lex,=20Fro?=
=?UTF-8?q?ntend]=20Implement=20naive=20#embed=20for=20C23=20and=20C++26.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
🛠 [Frontend] Ensure commas inserted by #embed are properly serialized to output
---
clang/CMakeLists.txt | 3 +-
clang/include/clang/Basic/Builtins.def | 3 +
clang/include/clang/Basic/DiagnosticGroups.td | 6 +
.../include/clang/Basic/DiagnosticLexKinds.td | 24 +-
clang/include/clang/Basic/FileManager.h | 8 +-
clang/include/clang/Basic/TokenKinds.def | 7 +
clang/include/clang/Driver/Options.td | 16 +
.../Frontend/PreprocessorOutputOptions.h | 2 +
clang/include/clang/Lex/PPCallbacks.h | 77 ++-
clang/include/clang/Lex/Preprocessor.h | 66 ++-
clang/include/clang/Lex/PreprocessorOptions.h | 7 +
clang/lib/Basic/FileManager.cpp | 8 +-
clang/lib/Basic/IdentifierTable.cpp | 3 +-
clang/lib/Driver/ToolChains/Clang.cpp | 5 +-
clang/lib/Format/FormatToken.h | 2 +
clang/lib/Format/TokenAnnotator.cpp | 28 +
clang/lib/Frontend/CompilerInvocation.cpp | 19 +
clang/lib/Frontend/DependencyFile.cpp | 29 +
clang/lib/Frontend/DependencyGraph.cpp | 43 +-
clang/lib/Frontend/InitPreprocessor.cpp | 7 +
.../lib/Frontend/PrintPreprocessedOutput.cpp | 25 +-
.../Frontend/Rewrite/InclusionRewriter.cpp | 13 +
clang/lib/Lex/PPCallbacks.cpp | 11 -
clang/lib/Lex/PPDirectives.cpp | 500 ++++++++++++++++++
clang/lib/Lex/PPExpressions.cpp | 44 +-
clang/lib/Lex/PPMacroExpansion.cpp | 120 +++++
clang/test/Preprocessor/Inputs/jk.txt | 1 +
clang/test/Preprocessor/Inputs/media/art.txt | 9 +
clang/test/Preprocessor/Inputs/media/empty | 0
.../test/Preprocessor/Inputs/single_byte.txt | 1 +
clang/test/Preprocessor/embed___has_embed.c | 34 ++
.../embed___has_embed_supported.c | 24 +
.../test/Preprocessor/embed_feature_test.cpp | 13 +
.../test/Preprocessor/embed_file_not_found.c | 4 +
clang/test/Preprocessor/embed_init.c | 28 +
.../Preprocessor/embed_parameter_if_empty.c | 16 +
.../test/Preprocessor/embed_parameter_limit.c | 15 +
.../Preprocessor/embed_parameter_offset.c | 15 +
.../Preprocessor/embed_parameter_prefix.c | 15 +
.../Preprocessor/embed_parameter_suffix.c | 15 +
.../embed_parameter_unrecognized.c | 8 +
clang/test/Preprocessor/embed_path_chevron.c | 8 +
clang/test/Preprocessor/embed_path_quote.c | 8 +
clang/test/Preprocessor/single_byte.txt | 1 +
llvm/CMakeLists.txt | 7 +
llvm/cmake/modules/GetHostTriple.cmake | 6 +-
46 files changed, 1264 insertions(+), 40 deletions(-)
create mode 100644 clang/test/Preprocessor/Inputs/jk.txt
create mode 100644 clang/test/Preprocessor/Inputs/media/art.txt
create mode 100644 clang/test/Preprocessor/Inputs/media/empty
create mode 100644 clang/test/Preprocessor/Inputs/single_byte.txt
create mode 100644 clang/test/Preprocessor/embed___has_embed.c
create mode 100644 clang/test/Preprocessor/embed___has_embed_supported.c
create mode 100644 clang/test/Preprocessor/embed_feature_test.cpp
create mode 100644 clang/test/Preprocessor/embed_file_not_found.c
create mode 100644 clang/test/Preprocessor/embed_init.c
create mode 100644 clang/test/Preprocessor/embed_parameter_if_empty.c
create mode 100644 clang/test/Preprocessor/embed_parameter_limit.c
create mode 100644 clang/test/Preprocessor/embed_parameter_offset.c
create mode 100644 clang/test/Preprocessor/embed_parameter_prefix.c
create mode 100644 clang/test/Preprocessor/embed_parameter_suffix.c
create mode 100644 clang/test/Preprocessor/embed_parameter_unrecognized.c
create mode 100644 clang/test/Preprocessor/embed_path_chevron.c
create mode 100644 clang/test/Preprocessor/embed_path_quote.c
create mode 100644 clang/test/Preprocessor/single_byte.txt
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 9b52c58be41e7f7..1b88905da3b8597 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -300,6 +300,7 @@ configure_file(
${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc)
# Add appropriate flags for GCC
+option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -307,7 +308,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
endif ()
# Enable -pedantic for Clang even if it's not enabled for LLVM.
- if (NOT LLVM_ENABLE_PEDANTIC)
+ if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long")
endif ()
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 6ea8484606cfd5d..0dfc6456daf059a 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1766,6 +1766,9 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
// Arithmetic Fence: to prevent FP reordering and reassociation optimizations
LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES)
+// preprocessor embed builtin
+LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES)
+
#undef BUILTIN
#undef LIBBUILTIN
#undef LANGBUILTIN
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 0b09c002191848a..89f6715cebfdc0d 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -708,6 +708,12 @@ def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">;
def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>;
def RestrictExpansionMacro : DiagGroup<"restrict-expansion">;
def FinalMacro : DiagGroup<"final-macro">;
+// Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions)
+def UnsupportedDirective : DiagGroup<"unsupported-directive">;
+def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">;
+def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">;
+def DirectiveParameters : DiagGroup<"directive-parameters",
+ [UnknownDirectiveParameters, IgnoredDirectiveParameters]>;
// Just silence warnings about -Wstrict-aliasing for now.
def : DiagGroup<"strict-aliasing=0">;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 940cca67368492f..4490f40806b0345 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -422,6 +422,22 @@ def warn_cxx23_compat_warning_directive : Warning<
def warn_c23_compat_warning_directive : Warning<
"#warning is incompatible with C standards before C23">,
InGroup<CPre23Compat>, DefaultIgnore;
+def warn_c23_pp_embed : Warning<
+ "'__has_embed' is a C23 extension">,
+ InGroup<CPre23Compat>,
+ DefaultIgnore;
+def warn_c23_pp_has_embed : Warning<
+ "'__has_embed' is a C23 extension">,
+ InGroup<CPre23Compat>,
+ DefaultIgnore;
+def warn_cxx26_pp_embed : Warning<
+ "'__has_embed' is a C++26 extension">,
+ InGroup<CXXPre26Compat>,
+ DefaultIgnore;
+def warn_cxx26_pp_has_embed : Warning<
+ "'__has_embed' is a C++26 extension">,
+ InGroup<CXXPre26Compat>,
+ DefaultIgnore;
def ext_pp_extra_tokens_at_eol : ExtWarn<
"extra tokens at end of #%0 directive">, InGroup<ExtraTokens>;
@@ -483,7 +499,13 @@ def ext_pp_gnu_line_directive : Extension<
def err_pp_invalid_directive : Error<
"invalid preprocessing directive%select{|, did you mean '#%1'?}0">;
def warn_pp_invalid_directive : Warning<
- err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
+ err_pp_invalid_directive.Summary>,
+ InGroup<UnsupportedDirective>;
+def warn_pp_unknown_parameter_ignored : Warning<
+ "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">,
+ InGroup<UnknownDirectiveParameters>;
+def err_pp_unsupported_directive : Error<
+ "unsupported%select{ | embed}0 directive: %1">;
def err_pp_directive_required : Error<
"%0 must be used within a preprocessing directive">;
def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal;
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 56cb093dd8c376f..c757f8775b425e9 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -276,11 +276,13 @@ class FileManager : public RefCountedBase<FileManager> {
/// MemoryBuffer if successful, otherwise returning null.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBufferForFile(FileEntryRef Entry, bool isVolatile = false,
- bool RequiresNullTerminator = true);
+ bool RequiresNullTerminator = true,
+ std::optional<int64_t> MaybeLimit = std::nullopt);
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBufferForFile(StringRef Filename, bool isVolatile = false,
- bool RequiresNullTerminator = true) {
- return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
+ bool RequiresNullTerminator = true,
+ std::optional<int64_t> MaybeLimit = std::nullopt) {
+ return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile,
RequiresNullTerminator);
}
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 94db56a9fd5d78c..19a66fbb0731194 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -126,6 +126,9 @@ PPKEYWORD(error)
// C99 6.10.6 - Pragma Directive.
PPKEYWORD(pragma)
+// C23 & C++26 #embed
+PPKEYWORD(embed)
+
// GNU Extensions.
PPKEYWORD(import)
PPKEYWORD(include_next)
@@ -151,6 +154,10 @@ TOK(eod) // End of preprocessing directive (end of line inside a
// directive).
TOK(code_completion) // Code completion marker
+// #embed speed support
+TOK(builtin_embed)
+
+
// C99 6.4.9: Comments.
TOK(comment) // Comment (only in -E -C[C] mode)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3f2058a5d4650ca..a77a1a5e9aad981 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -114,6 +114,11 @@ def IncludePath_Group : OptionGroup<"<I/i group>">, Group<Preprocessor_Group>,
DocBrief<[{
Flags controlling how ``#include``\s are resolved to files.}]>;
+def EmbedPath_Group : OptionGroup<"<Embed group>">, Group<Preprocessor_Group>,
+ DocName<"Embed path management">,
+ DocBrief<[{
+Flags controlling how ``#embed``\s and similar are resolved to files.}]>;
+
def I_Group : OptionGroup<"<I group>">, Group<IncludePath_Group>, DocFlatten;
def i_Group : OptionGroup<"<i group>">, Group<IncludePath_Group>, DocFlatten;
def clang_i_Group : OptionGroup<"<clang i group>">, Group<i_Group>, DocFlatten;
@@ -816,6 +821,14 @@ will be ignored}]>;
def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
Visibility<[ClangOption, FlangOption]>,
MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
+def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
+ Flags<[RenderJoined]>, Group<EmbedPath_Group>,
+ Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+ MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
+def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
+ Flags<[RenderJoined]>, Group<EmbedPath_Group>,
+ Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+ MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
def MD : Flag<["-"], "MD">, Group<M_Group>,
HelpText<"Write a depfile containing user and system headers">;
def MMD : Flag<["-"], "MMD">, Group<M_Group>,
@@ -1353,6 +1366,9 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>
def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Print include directives in -E mode in addition to normal output">,
MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>;
+def dE : Flag<["-"], "dE">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
+ HelpText<"Print embed directives in -E mode in addition to normal output">,
+ MarshallingInfoFlag<PreprocessorOutputOpts<"ShowEmbedDirectives">>;
def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Print macro definitions in -E mode instead of normal output">;
def dead__strip : Flag<["-"], "dead_strip">;
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index db2ec9f2ae20698..3e36db3f8ce46ea 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -22,6 +22,7 @@ class PreprocessorOutputOptions {
unsigned ShowMacroComments : 1; ///< Show comments, even in macros.
unsigned ShowMacros : 1; ///< Print macro definitions.
unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output.
+ unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed output.
unsigned RewriteIncludes : 1; ///< Preprocess include directives only.
unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules.
unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
@@ -37,6 +38,7 @@ class PreprocessorOutputOptions {
ShowMacroComments = 0;
ShowMacros = 0;
ShowIncludeDirectives = 0;
+ ShowEmbedDirectives = 0;
RewriteIncludes = 0;
RewriteImports = 0;
MinimizeWhitespace = 0;
diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index 94f96cf9c512541..921bf159ead570d 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -83,6 +83,47 @@ class PPCallbacks {
const Token &FilenameTok,
SrcMgr::CharacteristicKind FileType) {}
+ /// Callback invoked whenever the preprocessor cannot find a file for an
+ /// embed directive.
+ ///
+ /// \param FileName The name of the file being included, as written in the
+ /// source code.
+ ///
+ /// \returns true to indicate that the preprocessor should skip this file
+ /// and not issue any diagnostic.
+ virtual bool EmbedFileNotFound(StringRef FileName) { return false; }
+
+ /// Callback invoked whenever an embed directive has been processed,
+ /// regardless of whether the embed will actually find a file.
+ ///
+ /// \param HashLoc The location of the '#' that starts the embed directive.
+ ///
+ /// \param FileName The name of the file being included, as written in the
+ /// source code.
+ ///
+ /// \param IsAngled Whether the file name was enclosed in angle brackets;
+ /// otherwise, it was enclosed in quotes.
+ ///
+ /// \param FilenameRange The character range of the quotes or angle brackets
+ /// for the written file name.
+ ///
+ /// \param ParametersRange The character range of the embed parameters. An
+ /// empty range if there were no parameters.
+ ///
+ /// \param File The actual file that may be included by this embed directive.
+ ///
+ /// \param SearchPath Contains the search path which was used to find the file
+ /// in the file system. If the file was found via an absolute path,
+ /// SearchPath will be empty.
+ ///
+ /// \param RelativePath The path relative to SearchPath, at which the resource
+ /// file was found. This is equal to FileName.
+ virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName,
+ bool IsAngled, CharSourceRange FilenameRange,
+ CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) {}
+
/// Callback invoked whenever the preprocessor cannot find a file for an
/// inclusion directive.
///
@@ -330,11 +371,15 @@ class PPCallbacks {
SourceRange Range) {
}
+ /// Hook called when a '__has_embed' directive is read.
+ virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File) {}
+
/// Hook called when a '__has_include' or '__has_include_next' directive is
/// read.
virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
OptionalFileEntryRef File,
- SrcMgr::CharacteristicKind FileType);
+ SrcMgr::CharacteristicKind FileType) {}
/// Hook called when a source range is skipped.
/// \param Range The SourceRange that was skipped. The range begins at the
@@ -461,6 +506,25 @@ class PPChainedCallbacks : public PPCallbacks {
Second->FileSkipped(SkippedFile, FilenameTok, FileType);
}
+ bool EmbedFileNotFound(StringRef FileName) override {
+ bool Skip = First->FileNotFound(FileName);
+ // Make sure to invoke the second callback, no matter if the first already
+ // returned true to skip the file.
+ Skip |= Second->FileNotFound(FileName);
+ return Skip;
+ }
+
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange,
+ CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) override {
+ First->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
+ ParametersRange, File, SearchPath, RelativePath);
+ Second->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
+ ParametersRange, File, SearchPath, RelativePath);
+ }
+
bool FileNotFound(StringRef FileName) override {
bool Skip = First->FileNotFound(FileName);
// Make sure to invoke the second callback, no matter if the first already
@@ -561,9 +625,18 @@ class PPChainedCallbacks : public PPCallbacks {
Second->PragmaDiagnostic(Loc, Namespace, mapping, Str);
}
+ void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File) override {
+ First->HasEmbed(Loc, FileName, IsAngled, File);
+ Second->HasEmbed(Loc, FileName, IsAngled, File);
+ }
+
void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
OptionalFileEntryRef File,
- SrcMgr::CharacteristicKind FileType) override;
+ SrcMgr::CharacteristicKind FileType) override {
+ First->HasInclude(Loc, FileName, IsAngled, File, FileType);
+ Second->HasInclude(Loc, FileName, IsAngled, File, FileType);
+ }
void PragmaOpenCLExtension(SourceLocation NameLoc, const IdentifierInfo *Name,
SourceLocation StateLoc, unsigned State) override {
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 18d88407ae12c90..7470bf5882730cb 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -31,6 +31,7 @@
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Token.h"
#include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -53,6 +54,7 @@
#include <optional>
#include <string>
#include <utility>
+#include <variant>
#include <vector>
namespace llvm {
@@ -165,6 +167,7 @@ class Preprocessor {
IdentifierInfo *Ident__has_builtin; // __has_builtin
IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin
IdentifierInfo *Ident__has_attribute; // __has_attribute
+ IdentifierInfo *Ident__has_embed; // __has_embed
IdentifierInfo *Ident__has_include; // __has_include
IdentifierInfo *Ident__has_include_next; // __has_include_next
IdentifierInfo *Ident__has_warning; // __has_warning
@@ -206,7 +209,10 @@ class Preprocessor {
enum {
/// Maximum depth of \#includes.
- MaxAllowedIncludeStackDepth = 200
+ MaxAllowedIncludeStackDepth = 200,
+ VALUE__STDC_EMBED_NOT_FOUND__ = 0,
+ VALUE__STDC_EMBED_FOUND__ = 1,
+ VALUE__STDC_EMBED_EMPTY__ = 2,
};
// State that is set before the preprocessor begins.
@@ -1728,6 +1734,22 @@ class Preprocessor {
/// Lex a token, forming a header-name token if possible.
bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
+ struct LexEmbedParametersResult {
+ bool Successful;
+ std::optional<size_t> MaybeLimitParam;
+ std::optional<size_t> MaybeOffsetParam;
+ std::optional<SmallVector<Token, 2>> MaybeIfEmptyParam;
+ std::optional<SmallVector<Token, 2>> MaybePrefixParam;
+ std::optional<SmallVector<Token, 2>> MaybeSuffixParam;
+ int UnrecognizedParams;
+ SourceLocation StartLoc;
+ SourceLocation EndLoc;
+ };
+
+ LexEmbedParametersResult LexEmbedParameters(Token &Current,
+ bool InHasEmbed = false,
+ bool DiagnoseUnknown = true);
+
bool LexAfterModuleImport(Token &Result);
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
@@ -2413,6 +2435,17 @@ class Preprocessor {
bool *IsFrameworkFound, bool SkipCache = false,
bool OpenFile = true, bool CacheFailures = true);
+ /// Given a "foo" or \<foo> reference, look up the indicated embed resource.
+ ///
+ /// Returns std::nullopt on failure. \p isAngled indicates whether the file
+ /// reference is for system \#include's or not (i.e. using <> instead of "").
+ OptionalFileEntryRef
+ LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+ bool OpenFile,
+ const FileEntry *LookupFromFile = nullptr,
+ SmallVectorImpl<char> *SearchPath = nullptr,
+ SmallVectorImpl<char> *RelativePath = nullptr);
+
/// Return true if we're in the top-level file, not in a \#include.
bool isInPrimaryFile() const;
@@ -2517,6 +2550,9 @@ class Preprocessor {
/// Information about the result for evaluating an expression for a
/// preprocessor directive.
struct DirectiveEvalResult {
+ /// The integral value of the expression.
+ std::optional<llvm::APSInt> Value;
+
/// Whether the expression was evaluated as true or not.
bool Conditional;
@@ -2531,7 +2567,24 @@ class Preprocessor {
/// \#if or \#elif directive and return a \p DirectiveEvalResult object.
///
/// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
- DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
+ DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ bool CheckForEoD = true,
+ bool Parenthesized = false);
+
+ /// Evaluate an integer constant expression that may occur after a
+ /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
+ ///
+ /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
+ DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ Token &Tok,
+ bool CheckForEoD = true,
+ bool Parenthesized = false);
+
+ /// Process a '__has_embed("path" [, ...])' expression.
+ ///
+ /// Returns predefined `__STDC_EMBED_*` macro values if
+ /// successful.
+ int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
/// Process a '__has_include("path")' expression.
///
@@ -2679,6 +2732,15 @@ class Preprocessor {
const FileEntry *LookupFromFile, StringRef &LookupFilename,
SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
+ // Binary data inclusion
+ void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+ const FileEntry *LookupFromFile = nullptr);
+ void HandleEmbedDirectiveNaive(
+ SourceLocation FilenameTok, LexEmbedParametersResult &Params,
+ StringRef BinaryContents, const size_t TargetCharWidth);
+ void HandleEmbedDirectiveBuiltin(
+ SourceLocation FilenameTok, LexEmbedParametersResult &Params,
+ StringRef BinaryContents, const size_t TargetCharWidth);
// File inclusion.
void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 058194bcde72e51..23f3458d79e0312 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -167,6 +167,13 @@ class PreprocessorOptions {
/// of the specified memory buffer (the second part of each pair).
std::vector<std::pair<std::string, llvm::MemoryBuffer *>> RemappedFileBuffers;
+ /// User specified embed entries.
+ std::vector<std::string> EmbedEntries;
+
+ /// Whether or not naive expansion should be used all the time for
+ /// builtin embed
+ bool NoBuiltinPPEmbed = false;
+
/// Whether the compiler instance should retain (i.e., not free)
/// the buffers associated with remapped files.
///
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index d16626b10652136..e0e80b5e0fbedbe 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -537,13 +537,19 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
- bool RequiresNullTerminator) {
+ bool RequiresNullTerminator,
+ std::optional<int64_t> MaybeLimit) {
const FileEntry *Entry = &FE.getFileEntry();
// If the content is living on the file entry, return a reference to it.
if (Entry->Content)
return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef());
uint64_t FileSize = Entry->getSize();
+
+ if (MaybeLimit)
+ FileSize = *MaybeLimit;
+
+
// If there's a high enough chance that the file have changed since we
// got its size, force a stat before opening it.
if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index e5599d545541085..d2b5426d27bb3b2 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -423,7 +423,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
// case values). Note that this depends on 'if' being null terminated.
#define HASH(LEN, FIRST, THIRD) \
- (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
+ (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63)
#define CASE(LEN, FIRST, THIRD, NAME) \
case HASH(LEN, FIRST, THIRD): \
return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -438,6 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
CASE( 4, 'e', 's', else);
CASE( 4, 'l', 'n', line);
CASE( 4, 's', 'c', sccs);
+ CASE( 5, 'e', 'b', embed);
CASE( 5, 'e', 'd', endif);
CASE( 5, 'e', 'r', error);
CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b91126ebed0186c..fc2f749a34fc471 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1324,7 +1324,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
Args.addAllArgs(CmdArgs,
{options::OPT_D, options::OPT_U, options::OPT_I_Group,
- options::OPT_F, options::OPT_index_header_map});
+ options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group});
// Add -Wp, and -Xpreprocessor if using the preprocessor.
@@ -8182,6 +8182,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
// Pass along any -I options so we get proper .include search paths.
Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
+ // Pass along any -embed-dir or similar options so we get proper embed paths.
+ Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group);
+
// Determine the original source input.
auto FindSource = [](const Action *S) -> const Action * {
while (S->getKind() != Action::InputClass) {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 606e9e790ad833b..232626e783e1b7d 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -1008,6 +1008,7 @@ struct AdditionalKeywords {
kw_synchronized = &IdentTable.get("synchronized");
kw_throws = &IdentTable.get("throws");
kw___except = &IdentTable.get("__except");
+ kw___has_embed = &IdentTable.get("__has_embed");
kw___has_include = &IdentTable.get("__has_include");
kw___has_include_next = &IdentTable.get("__has_include_next");
@@ -1305,6 +1306,7 @@ struct AdditionalKeywords {
IdentifierInfo *kw_NS_ERROR_ENUM;
IdentifierInfo *kw_NS_OPTIONS;
IdentifierInfo *kw___except;
+ IdentifierInfo *kw___has_embed;
IdentifierInfo *kw___has_include;
IdentifierInfo *kw___has_include_next;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 543c119620bf28f..e405a9085951dc0 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1400,6 +1400,9 @@ class AnnotatingParser {
Keywords.kw___has_include_next)) {
parseHasInclude();
}
+ else if (Tok->is(Keywords.kw___has_embed)) {
+ parseHasEmbed();
+ }
if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
Tok->Next->isNot(tok::l_paren)) {
Tok->setType(TT_CSharpGenericTypeConstraint);
@@ -1464,6 +1467,21 @@ class AnnotatingParser {
}
}
+ void parseEmbedDirective() {
+ if (CurrentToken && CurrentToken->is(tok::less)) {
+ next();
+ while (CurrentToken) {
+ // Mark tokens up to the trailing line comments as implicit string
+ // literals.
+ if (CurrentToken->isNot(tok::comment) &&
+ !CurrentToken->TokenText.startswith("//")) {
+ CurrentToken->setType(TT_ImplicitStringLiteral);
+ }
+ next();
+ }
+ }
+ }
+
void parseWarningOrError() {
next();
// We still want to format the whitespace left of the first token of the
@@ -1500,6 +1518,14 @@ class AnnotatingParser {
next(); // ')'
}
+ void parseHasEmbed() {
+ if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
+ return;
+ next(); // '('
+ parseEmbedDirective();
+ next(); // ')'
+ }
+
LineType parsePreprocessorDirective() {
bool IsFirstToken = CurrentToken->IsFirst;
LineType Type = LT_PreprocessorDirective;
@@ -1563,6 +1589,8 @@ class AnnotatingParser {
} else if (Tok->isOneOf(Keywords.kw___has_include,
Keywords.kw___has_include_next)) {
parseHasInclude();
+ } else if (Tok->is(Keywords.kw___has_embed)) {
+ parseHasEmbed();
}
}
return Type;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bb442495f58359c..05406b5d42d7380 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4302,6 +4302,12 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
if (Opts.SourceDateEpoch)
GenerateArg(Consumer, OPT_source_date_epoch, Twine(*Opts.SourceDateEpoch));
+ for (const auto &EmbedEntry : Opts.EmbedEntries)
+ GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
+
+ if (Opts.NoBuiltinPPEmbed)
+ GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+
// Don't handle LexEditorPlaceholders. It is implied by the action that is
// generated elsewhere.
}
@@ -4394,6 +4400,19 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
}
}
+ for (const auto *A : Args.filtered(OPT_embed_dir, OPT_embed_dir_EQ)) {
+ StringRef Val = A->getValue();
+ Opts.EmbedEntries.push_back(std::string(Val));
+ }
+
+ // Can disable the internal embed builtin / token
+ for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
+ StringRef Val = A->getValue();
+ if (Val == "pp_embed") {
+ Opts.NoBuiltinPPEmbed = true;
+ }
+ }
+
// Always avoid lexing editor placeholders when we're just running the
// preprocessor as we never want to emit the
// "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index c2f6f41ae291efb..10558b1d34bf623 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -65,6 +65,21 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
/*IsMissing=*/false);
}
+ void EmbedDirective(SourceLocation HashLoc,
+ StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) override {
+ if (!File)
+ DepCollector.maybeAddDependency(FileName,
+ /*FromModule*/ false,
+ /*IsSystem*/ false,
+ /*IsModuleFile*/ false,
+ &PP.getFileManager(),
+ /*IsMissing*/ true);
+ // Files that actually exist are handled by FileChanged.
+ }
+
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -81,6 +96,20 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
// Files that actually exist are handled by FileChanged.
}
+ void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
+ OptionalFileEntryRef File) override {
+ if (!File)
+ return;
+ StringRef Filename =
+ llvm::sys::path::remove_leading_dotslash(File->getName());
+ DepCollector.maybeAddDependency(Filename,
+ /*FromModule=*/false,
+ false,
+ /*IsModuleFile=*/false,
+ &PP.getFileManager(),
+ /*IsMissing=*/false);
+ }
+
void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
OptionalFileEntryRef File,
SrcMgr::CharacteristicKind FileType) override {
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 6aad04370f6e7ad..683f751a94244ec 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -26,6 +26,14 @@ namespace DOT = llvm::DOT;
namespace {
class DependencyGraphCallback : public PPCallbacks {
+public:
+ enum DirectiveBehavior {
+ Normal = 0,
+ IgnoreEmbed = 0b01,
+ IgnoreInclude = 0b10,
+ };
+
+private:
const Preprocessor *PP;
std::string OutputFile;
std::string SysRoot;
@@ -34,6 +42,7 @@ class DependencyGraphCallback : public PPCallbacks {
llvm::DenseMap<FileEntryRef, SmallVector<FileEntryRef, 2>>;
DependencyMap Dependencies;
+ DirectiveBehavior Behavior;
private:
raw_ostream &writeNodeReference(raw_ostream &OS,
@@ -42,7 +51,8 @@ class DependencyGraphCallback : public PPCallbacks {
public:
DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
- StringRef SysRoot)
+ StringRef SysRoot,
+ DirectiveBehavior Action = IgnoreEmbed)
: PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
@@ -52,6 +62,12 @@ class DependencyGraphCallback : public PPCallbacks {
StringRef RelativePath, const Module *Imported,
SrcMgr::CharacteristicKind FileType) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange,
+ CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) override;
+
void EndOfMainFile() override {
OutputGraphFile();
}
@@ -70,6 +86,31 @@ void DependencyGraphCallback::InclusionDirective(
bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
StringRef SearchPath, StringRef RelativePath, const Module *Imported,
SrcMgr::CharacteristicKind FileType) {
+ if ((Behavior & IgnoreInclude) == IgnoreInclude) {
+ return;
+ }
+ if (!File)
+ return;
+
+ SourceManager &SM = PP->getSourceManager();
+ OptionalFileEntryRef FromFile =
+ SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc)));
+ if (!FromFile)
+ return;
+
+ Dependencies[*FromFile].push_back(*File);
+
+ AllFiles.insert(*File);
+ AllFiles.insert(*FromFile);
+}
+
+void DependencyGraphCallback::EmbedDirective(
+ SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+ if ((Behavior & IgnoreEmbed) == IgnoreEmbed) {
+ return;
+ }
if (!File)
return;
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 846e5fce6de7b2c..b7d084773b0a195 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -498,6 +498,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__STDC_UTF_16__", "1");
Builder.defineMacro("__STDC_UTF_32__", "1");
+ // __has_embed definitions
+ Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0");
+ Builder.defineMacro("__STDC_EMBED_FOUND__", "1");
+ Builder.defineMacro("__STDC_EMBED_EMPTY__", "2");
+
if (LangOpts.ObjC)
Builder.defineMacro("__OBJC__");
@@ -729,6 +734,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
if (LangOpts.Char8)
Builder.defineMacro("__cpp_char8_t", "202207L");
Builder.defineMacro("__cpp_impl_destroying_delete", "201806L");
+
+ Builder.defineMacro("__cpp_pp_embed", "202403L");
}
/// InitializeOpenCLFeatureTestMacros - Define OpenCL macros based on target
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 7f5f6690682300e..fb9baa92e6836d3 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
bool DisableLineMarkers;
bool DumpDefines;
bool DumpIncludeDirectives;
+ bool DumpEmbedDirectives;
bool UseLineDirectives;
bool IsFirstFileEntered;
bool MinimizeWhitespace;
@@ -106,12 +107,13 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
public:
PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
- bool defines, bool DumpIncludeDirectives,
+ bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives,
bool UseLineDirectives, bool MinimizeWhitespace,
bool DirectivesOnly, bool KeepSystemIncludes)
: PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
DisableLineMarkers(lineMarkers), DumpDefines(defines),
DumpIncludeDirectives(DumpIncludeDirectives),
+ DumpEmbedDirectives(DumpEmbedDirectives),
UseLineDirectives(UseLineDirectives),
MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
@@ -149,6 +151,11 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange,
+ CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) override;
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -398,6 +405,20 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
}
}
+void PrintPPOutputPPCallbacks::EmbedDirective(
+ SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+ // In -dI mode, dump #include directives prior to dumping their content or
+ // interpretation.
+ if (DumpEmbedDirectives) {
+ MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+ *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+ << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
+ setEmittedDirectiveOnThisLine();
+ }
+}
+
void PrintPPOutputPPCallbacks::InclusionDirective(
SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
@@ -981,7 +1002,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
- Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
+ Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives,
Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
// Expand macros in pragmas with -fms-extensions. The assumption is that
diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 28f7b0b9edfc5c2..4a73946951fd9c2 100644
--- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -71,6 +71,11 @@ class InclusionRewriter : public PPCallbacks {
FileID PrevFID) override;
void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
SrcMgr::CharacteristicKind FileType) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange,
+ CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) override;
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -177,6 +182,14 @@ void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
LastInclusionLocation = SourceLocation();
}
+/// This should be called whenever the preprocessor encounters embed
+/// directives.
+void InclusionRewriter::EmbedDirective(
+ SourceLocation /*HashLoc*/, StringRef /*FileName*/, bool /*IsAngled*/,
+ CharSourceRange /*FilenameRange*/, CharSourceRange /*ParametersRange*/,
+ OptionalFileEntryRef /*File*/, StringRef /*SearchPath*/,
+ StringRef /*RelativePath*/) {}
+
/// This should be called whenever the preprocessor encounters include
/// directives. It does not say whether the file has been included, but it
/// provides more information about the directive (hash location instead
diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp
index f2b60a728e90178..ea5dce2c27a587c 100644
--- a/clang/lib/Lex/PPCallbacks.cpp
+++ b/clang/lib/Lex/PPCallbacks.cpp
@@ -14,16 +14,5 @@ using namespace clang;
// Out of line key method.
PPCallbacks::~PPCallbacks() = default;
-void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,
- bool IsAngled, OptionalFileEntryRef File,
- SrcMgr::CharacteristicKind FileType) {}
-
// Out of line key method.
PPChainedCallbacks::~PPChainedCallbacks() = default;
-
-void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,
- bool IsAngled, OptionalFileEntryRef File,
- SrcMgr::CharacteristicKind FileType) {
- First->HasInclude(Loc, FileName, IsAngled, File, FileType);
- Second->HasInclude(Loc, FileName, IsAngled, File, FileType);
-}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e3065c17dc70b43..e0d98d7ca03fa11 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -18,7 +18,9 @@
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/FrontendOptions.h"
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/LexDiagnostic.h"
@@ -1079,6 +1081,101 @@ OptionalFileEntryRef Preprocessor::LookupFile(
return std::nullopt;
}
+OptionalFileEntryRef Preprocessor::LookupEmbedFile(
+ SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+ bool OpenFile, const FileEntry *LookupFromFile,
+ SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath) {
+ FileManager &FM = this->getFileManager();
+ if (llvm::sys::path::is_absolute(Filename)) {
+ // lookup path or immediately fail
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(Filename, true, OpenFile);
+ return llvm::expectedToOptional(std::move(ShouldBeEntry));
+ }
+
+ // Otherwise, it's search time!
+ SmallString<512> LookupPath;
+ // Non-angled lookup
+ if (!isAngled) {
+ bool TryLocalLookup = false;
+ if (SearchPath) {
+ // use the provided search path as the local lookup path
+ llvm::sys::path::native(*SearchPath, LookupPath);
+ TryLocalLookup = true;
+ } else if (LookupFromFile) {
+ // Use file-based lookup here
+ StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
+ if (!FullFileDir.empty()) {
+ llvm::sys::path::native(FullFileDir, LookupPath);
+ llvm::sys::path::remove_filename(LookupPath);
+ TryLocalLookup = true;
+ }
+ } else {
+ // Cannot do local lookup: give up.
+ TryLocalLookup = false;
+ }
+ if (TryLocalLookup) {
+ if (!LookupPath.empty() &&
+ !llvm::sys::path::is_separator(LookupPath.back())) {
+ LookupPath.append(llvm::sys::path::get_separator());
+ }
+ LookupPath.append(Filename);
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(LookupPath, true, OpenFile);
+ if (ShouldBeEntry) {
+ return std::move(*ShouldBeEntry);
+ } else {
+ llvm::consumeError(ShouldBeEntry.takeError());
+ }
+ }
+ }
+
+ if (!isAngled) {
+ // do working directory lookup
+ LookupPath.clear();
+ auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
+ if (MaybeWorkingDirEntry) {
+ DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
+ StringRef WorkingDir = WorkingDirEntry.getName();
+ if (!WorkingDir.empty()) {
+ llvm::sys::path::native(WorkingDir, LookupPath);
+ if (!LookupPath.empty() &&
+ !llvm::sys::path::is_separator(LookupPath.back())) {
+ LookupPath.append(llvm::sys::path::get_separator());
+ }
+ LookupPath.append(llvm::sys::path::get_separator());
+ LookupPath.append(Filename);
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(LookupPath, true, OpenFile);
+ if (ShouldBeEntry) {
+ return std::move(*ShouldBeEntry);
+ } else {
+ llvm::consumeError(ShouldBeEntry.takeError());
+ }
+ }
+ }
+ }
+
+ for (const auto &Entry : PPOpts->EmbedEntries) {
+ LookupPath.clear();
+ llvm::sys::path::native(Entry, LookupPath);
+ if (!LookupPath.empty() &&
+ !llvm::sys::path::is_separator(LookupPath.back())) {
+ LookupPath.append(llvm::sys::path::get_separator());
+ }
+ LookupPath.append(Filename.begin(), Filename.end());
+ llvm::sys::path::native(LookupPath);
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(LookupPath, true, OpenFile);
+ if (ShouldBeEntry) {
+ return std::move(*ShouldBeEntry);
+ } else {
+ llvm::consumeError(ShouldBeEntry.takeError());
+ }
+ }
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// Preprocessor Directive Handling.
//===----------------------------------------------------------------------===//
@@ -1174,6 +1271,7 @@ void Preprocessor::HandleDirective(Token &Result) {
case tok::pp_include_next:
case tok::pp___include_macros:
case tok::pp_pragma:
+ case tok::pp_embed:
Diag(Result, diag::err_embedded_directive) << II->getName();
Diag(*ArgMacro, diag::note_macro_expansion_here)
<< ArgMacro->getIdentifierInfo();
@@ -1288,6 +1386,11 @@ void Preprocessor::HandleDirective(Token &Result) {
return HandleIdentSCCSDirective(Result);
case tok::pp_sccs:
return HandleIdentSCCSDirective(Result);
+ case tok::pp_embed:
+ return HandleEmbedDirective(SavedHash.getLocation(), Result,
+ getCurrentFileLexer()
+ ? getCurrentFileLexer()->getFileEntry()
+ : nullptr);
case tok::pp_assert:
//isExtension = true; // FIXME: implement #assert
break;
@@ -3517,3 +3620,400 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
/*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
}
+
+enum class BracketType { Brace, Paren, Square };
+
+Preprocessor::LexEmbedParametersResult
+Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
+ bool DiagnoseUnknown) {
+ LexEmbedParametersResult Result{};
+ SmallString<32> Parameter;
+ SmallVector<Token, 2> ParameterTokens;
+ tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod;
+ Result.StartLoc = CurTok.getLocation();
+ for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) {
+ Parameter.clear();
+ // Lex identifier [:: identifier ...]
+ if (!CurTok.is(tok::identifier)) {
+ Diag(CurTok, diag::err_expected) << "identifier";
+ DiscardUntilEndOfDirective();
+ return Result;
+ }
+ Token ParameterStartTok = CurTok;
+ IdentifierInfo *InitialID = CurTok.getIdentifierInfo();
+ Parameter.append(InitialID->getName());
+ for (LexNonComment(CurTok); CurTok.is(tok::coloncolon);
+ LexNonComment(CurTok)) {
+ Parameter.append("::");
+ LexNonComment(CurTok);
+ if (!CurTok.is(tok::identifier)) {
+ Diag(CurTok, diag::err_expected) << "identifier";
+ DiscardUntilEndOfDirective();
+ return Result;
+ }
+ IdentifierInfo *NextID = CurTok.getIdentifierInfo();
+ Parameter.append(NextID->getName());
+ }
+ // Lex the parameters (dependent on the parameter type we want!)
+ if (Parameter == "limit") {
+ // we have a limit parameter and its internals are processed using
+ // evaluation rules from #if - handle here
+ if (CurTok.isNot(tok::l_paren)) {
+ Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
+ DiscardUntilEndOfDirective();
+ return Result;
+ }
+ IdentifierInfo *ParameterIfNDef = nullptr;
+ DirectiveEvalResult LimitEvalResult =
+ EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
+ if (!LimitEvalResult.Value) {
+ return Result;
+ }
+ const llvm::APSInt &LimitResult = *LimitEvalResult.Value;
+ const bool ValueDoesNotFit =
+ LimitResult.getBitWidth() > 64
+ ? true
+ : (LimitResult.isUnsigned() ||
+ (LimitResult.isSigned() && LimitResult.isNegative()));
+ if (ValueDoesNotFit) {
+ Diag(CurTok, diag::warn_pp_expr_overflow);
+ // just truncate and roll with that, I guess?
+ Result.MaybeLimitParam =
+ static_cast<size_t>(LimitResult.getRawData()[0]);
+ } else {
+ Result.MaybeLimitParam =
+ static_cast<size_t>(LimitResult.getZExtValue());
+ }
+ LexNonComment(CurTok);
+ } else if (Parameter == "clang::offset") {
+ // we have a limit parameter and its internals are processed using
+ // evaluation rules from #if - handle here
+ if (CurTok.isNot(tok::l_paren)) {
+ Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
+ DiscardUntilEndOfDirective();
+ return Result;
+ }
+ IdentifierInfo *ParameterIfNDef = nullptr;
+ DirectiveEvalResult OffsetEvalResult =
+ EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
+ if (!OffsetEvalResult.Value) {
+ return Result;
+ }
+ const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value;
+ if (OffsetResult.getBitWidth() > 64) {
+ Diag(CurTok, diag::warn_pp_expr_overflow);
+ // just truncate and roll with that, I guess?
+ Result.MaybeOffsetParam =
+ static_cast<size_t>(OffsetResult.getRawData()[0]);
+ } else {
+ Result.MaybeOffsetParam =
+ static_cast<size_t>(OffsetResult.getZExtValue());
+ }
+ LexNonComment(CurTok);
+ } else {
+ if (CurTok.is(tok::l_paren)) {
+ SmallVector<BracketType, 4> Brackets;
+ Brackets.push_back(BracketType::Paren);
+ auto ParseArgToken = [&]() {
+ for (LexNonComment(CurTok); CurTok.isNot(tok::eod);
+ LexNonComment(CurTok)) {
+ switch (CurTok.getKind()) {
+ default:
+ break;
+ case tok::l_paren:
+ Brackets.push_back(BracketType::Paren);
+ break;
+ case tok::r_paren:
+ if (Brackets.back() != BracketType::Paren) {
+ Diag(CurTok, diag::err_pp_expected_rparen);
+ return false;
+ }
+ Brackets.pop_back();
+ if (Brackets.empty()) {
+ return true;
+ }
+ break;
+ case tok::l_brace:
+ Brackets.push_back(BracketType::Brace);
+ break;
+ case tok::r_brace:
+ if (Brackets.back() != BracketType::Brace) {
+ Diag(CurTok, diag::err_expected) << "}";
+ return false;
+ }
+ Brackets.pop_back();
+ break;
+ case tok::l_square:
+ Brackets.push_back(BracketType::Square);
+ break;
+ case tok::r_square:
+ if (Brackets.back() != BracketType::Square) {
+ Diag(CurTok, diag::err_expected) << "]";
+ return false;
+ }
+ Brackets.pop_back();
+ break;
+ }
+ ParameterTokens.push_back(CurTok);
+ }
+ if (!Brackets.empty()) {
+ Diag(CurTok, diag::err_pp_expected_rparen);
+ DiscardUntilEndOfDirective();
+ return false;
+ }
+ return true;
+ };
+ if (!ParseArgToken()) {
+ return Result;
+ }
+ if (!CurTok.is(tok::r_paren)) {
+ Diag(CurTok, diag::err_pp_expected_rparen);
+ DiscardUntilEndOfDirective();
+ return Result;
+ }
+ Lex(CurTok);
+ }
+ // "Token-soup" parameters
+ if (Parameter == "if_empty") {
+ // TODO: integer list optimization
+ Result.MaybeIfEmptyParam = std::move(ParameterTokens);
+ } else if (Parameter == "prefix") {
+ // TODO: integer list optimization
+ Result.MaybePrefixParam = std::move(ParameterTokens);
+ } else if (Parameter == "suffix") {
+ // TODO: integer list optimization
+ Result.MaybeSuffixParam = std::move(ParameterTokens);
+ } else {
+ ++Result.UnrecognizedParams;
+ if (DiagnoseUnknown) {
+ Diag(ParameterStartTok, diag::warn_pp_unknown_parameter_ignored)
+ << 1 << Parameter;
+ }
+ }
+ }
+ }
+ Result.Successful = true;
+ return Result;
+}
+
+// This array must survive for an extended period of time
+inline constexpr const char *IntegerLiterals[] = {
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
+ "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
+ "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
+ "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43",
+ "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54",
+ "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65",
+ "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76",
+ "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87",
+ "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98",
+ "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
+ "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120",
+ "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131",
+ "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142",
+ "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153",
+ "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164",
+ "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175",
+ "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186",
+ "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197",
+ "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208",
+ "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
+ "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230",
+ "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241",
+ "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
+ "253", "254", "255"};
+
+void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc,
+ LexEmbedParametersResult &Params,
+ StringRef BinaryContents,
+ const size_t TargetCharWidth) {
+ (void)TargetCharWidth; // for later, when we support various sizes
+ size_t TokenIndex = 0;
+ const size_t InitListTokensSize = [&]() {
+ if (BinaryContents.empty()) {
+ if (Params.MaybeIfEmptyParam) {
+ return Params.MaybeIfEmptyParam->size();
+ } else {
+ return static_cast<size_t>(0);
+ }
+ } else {
+ return static_cast<size_t>(
+ (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) +
+ (BinaryContents.size() * 2 - 1) +
+ (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0));
+ }
+ }();
+ std::unique_ptr<Token[]> InitListTokens(new Token[InitListTokensSize]());
+
+ if (BinaryContents.empty()) {
+ if (Params.MaybeIfEmptyParam) {
+ std::copy(Params.MaybeIfEmptyParam->begin(),
+ Params.MaybeIfEmptyParam->end(), InitListTokens.get());
+ TokenIndex += Params.MaybeIfEmptyParam->size();
+ assert(TokenIndex == InitListTokensSize);
+ EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true,
+ true);
+ }
+ return;
+ }
+
+ // FIXME: this does not take the target's byte size into account;
+ // will fail on many DSPs and embedded machines!
+ if (Params.MaybePrefixParam) {
+ std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(),
+ InitListTokens.get() + TokenIndex);
+ TokenIndex += Params.MaybePrefixParam->size();
+ }
+ for (size_t I = 0; I < BinaryContents.size(); ++I) {
+ unsigned char ByteValue = BinaryContents[I];
+ StringRef ByteRepresentation = IntegerLiterals[ByteValue];
+ const size_t InitListIndex = TokenIndex;
+ Token &IntToken = InitListTokens[InitListIndex];
+ IntToken.setKind(tok::numeric_constant);
+ IntToken.setLiteralData(ByteRepresentation.data());
+ IntToken.setLength(ByteRepresentation.size());
+ IntToken.setLocation(FilenameLoc);
+ ++TokenIndex;
+ bool AtEndOfContents = I == (BinaryContents.size() - 1);
+ if (!AtEndOfContents) {
+ const size_t CommaInitListIndex = InitListIndex + 1;
+ Token &CommaToken = InitListTokens[CommaInitListIndex];
+ CommaToken.setKind(tok::comma);
+ CommaToken.setLocation(FilenameLoc);
+ ++TokenIndex;
+ }
+ }
+ if (Params.MaybeSuffixParam) {
+ std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(),
+ InitListTokens.get() + TokenIndex);
+ TokenIndex += Params.MaybeSuffixParam->size();
+ }
+ assert(TokenIndex == InitListTokensSize);
+ EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false);
+}
+
+void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc,
+ LexEmbedParametersResult &Params,
+ StringRef BinaryContents,
+ const size_t TargetCharWidth) {
+ // TODO: implement direct built-in support
+ HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+ TargetCharWidth);
+}
+
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+ const FileEntry *LookupFromFile) {
+ if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
+ auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed
+ : diag::warn_cxx26_pp_embed);
+ Diag(EmbedTok, EitherDiag);
+ }
+
+ // Parse the filename header
+ Token FilenameTok;
+ if (LexHeaderName(FilenameTok))
+ return;
+
+ if (FilenameTok.isNot(tok::header_name)) {
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ if (FilenameTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Parse the optional sequence of
+ // directive-parameters:
+ // identifier parameter-name-list[opt] directive-argument-list[opt]
+ // directive-argument-list:
+ // '(' balanced-token-sequence ')'
+ // parameter-name-list:
+ // '::' identifier parameter-name-list[opt]
+ Token CurTok;
+ LexEmbedParametersResult Params = LexEmbedParameters(
+ CurTok, /*InHasEmbed=*/false, /*DiagnoseUnknown=*/true);
+
+ // Now, splat the data out!
+ SmallString<128> FilenameBuffer;
+ SmallString<512> SearchPath;
+ SmallString<512> RelativePath;
+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+ SourceLocation FilenameLoc = FilenameTok.getLocation();
+ StringRef OriginalFilename = Filename;
+ bool isAngled =
+ GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ assert(!Filename.empty());
+ OptionalFileEntryRef MaybeFileRef =
+ this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
+ LookupFromFile, &SearchPath, &RelativePath);
+ if (!MaybeFileRef) {
+ // could not find file
+ if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
+ return;
+ }
+ Diag(FilenameTok, diag::err_pp_file_not_found)
+ << Filename;
+ return;
+ }
+ std::optional<int64_t> MaybeSignedLimit{};
+ if (Params.MaybeLimitParam) {
+ if (static_cast<uint64_t>(INT64_MAX) >= *Params.MaybeLimitParam) {
+ MaybeSignedLimit = static_cast<int64_t>(*Params.MaybeLimitParam);
+ }
+ }
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile = getFileManager().getBufferForFile(
+ *MaybeFileRef, false, false, MaybeSignedLimit);
+ if (!MaybeFile) {
+ // could not find file
+ Diag(FilenameTok, diag::err_cannot_open_file)
+ << Filename << "a buffer to the contents could not be created";
+ return;
+ }
+ StringRef BinaryContents = MaybeFile.get()->getBuffer();
+ if (Params.MaybeOffsetParam) {
+ // offsets all the way to the end of the file make for an empty file.
+ const size_t OffsetParam = *Params.MaybeOffsetParam;
+ BinaryContents = BinaryContents.substr(OffsetParam);
+ }
+ const size_t TargetCharWidth = getTargetInfo().getCharWidth();
+ if (TargetCharWidth > 64) {
+ // Too wide for us to handle
+ Diag(EmbedTok, diag::err_pp_unsupported_directive)
+ << 1
+ << "CHAR_BIT is too wide for the target architecture to handle "
+ "properly";
+ return;
+ }
+ if (TargetCharWidth != 8) {
+ Diag(EmbedTok, diag::err_pp_unsupported_directive)
+ << 1
+ << "At the moment, we do not have the machinery to support non 8-bit "
+ "CHAR_BIT targets!";
+ return;
+ }
+ if (CHAR_BIT % TargetCharWidth != 0) {
+ Diag(EmbedTok, diag::err_pp_unsupported_directive)
+ << 1
+ << "CHAR_BIT is not evenly divisible by host architecture's byte "
+ "definition";
+ return;
+ }
+ if (Callbacks) {
+ CharSourceRange FilenameSourceRange(
+ SourceRange(FilenameTok.getLocation(), FilenameTok.getEndLoc()), true);
+ CharSourceRange ParametersRange(SourceRange(Params.StartLoc, Params.EndLoc),
+ true);
+ Callbacks->EmbedDirective(HashLoc, Filename, isAngled, FilenameSourceRange,
+ ParametersRange, MaybeFileRef, SearchPath,
+ RelativePath);
+ }
+ if (PPOpts->NoBuiltinPPEmbed) {
+ HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+ TargetCharWidth);
+ } else {
+ // emit a token directly, handle it internally.
+ HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents,
+ TargetCharWidth);
+ }
+}
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 269984aae07bf28..dda5717afc699da 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -868,7 +868,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
/// may occur after a #if or #elif directive. If the expression is equivalent
/// to "!defined(X)" return X in IfNDefMacro.
Preprocessor::DirectiveEvalResult
-Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ Token &Tok, bool CheckForEoD,
+ bool Parenthesized) {
SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
// Save the current state of 'DisableMacroExpansion' and reset it to false. If
// 'DisableMacroExpansion' is true, then we must be in a macro argument list
@@ -880,7 +882,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
DisableMacroExpansion = false;
// Peek ahead one token.
- Token Tok;
LexNonComment(Tok);
// C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
@@ -901,7 +902,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// We cannot trust the source range from the value because there was a
// parse error. Track the range manually -- the end of the directive is the
// end of the condition range.
- return {false,
+ return {std::nullopt,
+ false,
DT.IncludedUndefinedIds,
{ExprStartLoc, ConditionRange.getEnd()}};
}
@@ -917,7 +919,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+ const bool IsNonZero = ResVal.Val != 0;
+ const SourceRange ValRange = ResVal.getRange();
+ return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+ ValRange};
}
// Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
@@ -930,17 +935,34 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
+ const bool IsNonZero = ResVal.Val != 0;
+ const SourceRange ValRange = ResVal.getRange();
+ return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+ ValRange};
}
- // If we aren't at the tok::eod token, something bad happened, like an extra
- // ')' token.
- if (Tok.isNot(tok::eod)) {
- Diag(Tok, diag::err_pp_expected_eol);
- DiscardUntilEndOfDirective();
+ if (CheckForEoD) {
+ // If we aren't at the tok::eod token, something bad happened, like an extra
+ // ')' token.
+ if (Tok.isNot(tok::eod)) {
+ Diag(Tok, diag::err_pp_expected_eol);
+ DiscardUntilEndOfDirective();
+ }
}
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+ const bool IsNonZero = ResVal.Val != 0;
+ const SourceRange ValRange = ResVal.getRange();
+ return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange};
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive. If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression(
+ IdentifierInfo *&IfNDefMacro, bool CheckForEoD, bool Parenthesized) {
+ Token Tok;
+ return EvaluateDirectiveExpression(IfNDefMacro, Tok, CheckForEoD,
+ Parenthesized);
}
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index b371f8cf7a9c072..6e0163ccc89b7fb 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() {
Ident__has_c_attribute = nullptr;
Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
+ Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed");
Ident__has_include = RegisterBuiltinMacro(*this, "__has_include");
Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning");
@@ -1264,6 +1265,114 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
return File.has_value();
}
+/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
+/// Returns a filled optional with the value if successful; otherwise, empty.
+int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+ // pedwarn for not being on C23
+ if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
+ auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed
+ : diag::warn_cxx26_pp_has_embed);
+ Diag(Tok, EitherDiag);
+ }
+
+ // Save the location of the current token. If a '(' is later found, use
+ // that location. If not, use the end of this location instead.
+ SourceLocation LParenLoc = Tok.getLocation();
+
+ // These expressions are only allowed within a preprocessor directive.
+ if (!this->isParsingIfOrElifDirective()) {
+ Diag(LParenLoc, diag::err_pp_directive_required) << II;
+ // Return a valid identifier token.
+ assert(Tok.is(tok::identifier));
+ Tok.setIdentifierInfo(II);
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+
+ // Get '('. If we don't have a '(', try to form a header-name token.
+ do {
+ if (this->LexHeaderName(Tok)) {
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+ } while (Tok.getKind() == tok::comment);
+
+ // Ensure we have a '('.
+ if (Tok.isNot(tok::l_paren)) {
+ // No '(', use end of last token.
+ LParenLoc = this->getLocForEndOfToken(LParenLoc);
+ this->Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
+ // If the next token looks like a filename or the start of one,
+ // assume it is and process it as such.
+ if (Tok.isNot(tok::header_name)) {
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+ } else {
+ // Save '(' location for possible missing ')' message.
+ LParenLoc = Tok.getLocation();
+ if (this->LexHeaderName(Tok)) {
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+ }
+
+ if (Tok.isNot(tok::header_name)) {
+ Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+
+ SourceLocation FilenameLoc = Tok.getLocation();
+ Token FilenameTok = Tok;
+
+ Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
+ if (!Params.Successful) {
+ if (Tok.isNot(tok::eod))
+ this->DiscardUntilEndOfDirective();
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+ if (Params.UnrecognizedParams > 0) {
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+
+ if (!Tok.is(tok::r_paren)) {
+ Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
+ << II << tok::r_paren;
+ Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+ DiscardUntilEndOfDirective();
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+
+
+ SmallString<128> FilenameBuffer;
+ SmallString<256> RelativePath;
+ StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
+ StringRef OriginalFilename = Filename;
+ bool isAngled =
+ this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ assert(!Filename.empty());
+ const FileEntry *LookupFromFile =
+ this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
+ : nullptr;
+ OptionalFileEntryRef MaybeFileEntry =
+ this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
+ LookupFromFile, nullptr,
+ &RelativePath);
+ if (Callbacks) {
+ Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
+ }
+ if (!MaybeFileEntry) {
+ return VALUE__STDC_EMBED_NOT_FOUND__;
+ }
+ size_t FileSize = MaybeFileEntry->getSize();
+ if (FileSize == 0 ||
+ (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) {
+ return VALUE__STDC_EMBED_EMPTY__;
+ }
+ if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) {
+ return VALUE__STDC_EMBED_EMPTY__;
+ }
+ return VALUE__STDC_EMBED_FOUND__;
+}
+
bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr);
}
@@ -1801,6 +1910,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
return;
OS << (int)Value;
Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__has_embed) {
+ // The argument to these two builtins should be a parenthesized
+ // file name string literal using angle brackets (<>) or
+ // double-quotes (""), optionally followed by a series of
+ // arguments similar to form like attributes.
+ int Value = EvaluateHasEmbed(Tok, II);
+
+ if (Tok.isNot(tok::r_paren))
+ return;
+ OS << Value;
+ Tok.setKind(tok::numeric_constant);
} else if (II == Ident__has_warning) {
// The argument should be a parenthesized string literal.
EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt
new file mode 100644
index 000000000000000..93d177a48c83ab8
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/jk.txt
@@ -0,0 +1 @@
+jk
\ No newline at end of file
diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt
new file mode 100644
index 000000000000000..1ce9ab967e4a154
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/art.txt
@@ -0,0 +1,9 @@
+ __ _
+ .-.' `; `-._ __ _
+ (_, .-:' `; `-._
+ ,'o"( (_, )
+ (__,-' ,'o"( )>
+ ( (__,-' )
+ `-'._.--._( )
+ ||| |||`-'._.--._.-'
+ ||| |||
diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty
new file mode 100644
index 000000000000000..e69de29bb2d1d64
diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt
new file mode 100644
index 000000000000000..63d8dbd40c23542
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/single_byte.txt
@@ -0,0 +1 @@
+b
\ No newline at end of file
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
new file mode 100644
index 000000000000000..80980e753614a5d
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 %s -E -embed-dir=%S/Inputs -CC -verify
+
+#if !__has_embed(__FILE__)
+#error 1
+#elif !__has_embed("media/art.txt")
+#error 2
+#elif __has_embed("asdkasdjkadsjkdsfjk")
+#error 3
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1))
+#error 4
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1))
+#error 5
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD"))
+#error 6
+#elif !__has_embed(__FILE__ limit(2) prefix(y))
+#error 7
+#elif !__has_embed(__FILE__ limit(2))
+#error 8
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x))
+#error 9
+#elif __has_embed(<media/empty>) != 2
+#error 10
+#elif __has_embed(<media/empty> limit(0)) != 2
+#error 11
+#elif __has_embed(<media/art.txt> limit(0)) != 2
+#error 12
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(1)) != 2
+#error 13
+#elif !__has_embed(<media/art.txt>)
+#error 14
+#elif !__has_embed(<media/art.txt> if_empty(meow))
+#error 14
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
new file mode 100644
index 000000000000000..fe0edb00e609837
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#if !__has_embed(__FILE__)
+#error 1
+#elif !__has_embed(__FILE__)
+#error 2
+#elif !__has_embed(__FILE__ suffix(x))
+#error 3
+#elif !__has_embed(__FILE__ suffix(x) limit(1))
+#error 4
+#elif !__has_embed(__FILE__ suffix(x) limit(1) prefix(1))
+#error 5
+#elif !__has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1))
+#error 6
+#elif !__has_embed(__FILE__ suffix(x) limit(0) prefix(1))
+#error 7
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != 2
+#error 8
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != 2
+#error 9
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != 2
+#error 10
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp
new file mode 100644
index 000000000000000..46787041ca23bec
--- /dev/null
+++ b/clang/test/Preprocessor/embed_feature_test.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -x c %s -E -CC -verify
+
+#if defined(__cplusplus)
+#if !defined(__cpp_pp_embed) || __cpp_pp_embed != 202403L
+#error 1
+#endif
+#endif
+
+#if !defined(__has_embed)
+#error 2
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_file_not_found.c b/clang/test/Preprocessor/embed_file_not_found.c
new file mode 100644
index 000000000000000..337fa4ac067ec71
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#embed <nfejfNejAKFe>
+// expected-error at -1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
new file mode 100644
index 000000000000000..cd517b7f216ac32
--- /dev/null
+++ b/clang/test/Preprocessor/embed_init.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+typedef struct kitty {
+ int purr;
+} kitty;
+
+typedef struct kitty_kitty {
+ int here;
+ kitty kit;
+} kitty_kitty;
+
+const int meow =
+#embed <single_byte.txt>
+;
+
+const kitty kit = {
+#embed <single_byte.txt>
+};
+
+const kitty_kitty kit_kit = {
+#embed <jk.txt>
+};
+
+_Static_assert(meow == 'b', "");
+_Static_assert(kit.purr == 'b', "");
+_Static_assert(kit_kit.here == 'j', "");
+_Static_assert(kit_kit.kit.purr == 'k', "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
new file mode 100644
index 000000000000000..ac1a768b27ffff9
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <media/empty> if_empty(123, 124, 125)
+};
+const char non_empty_data[] = {
+#embed <jk.txt> if_empty(123, 124, 125)
+};
+_Static_assert(sizeof(data) == 3, "");
+_Static_assert(123 == data[0], "");
+_Static_assert(124 == data[1], "");
+_Static_assert(125 == data[2], "");
+_Static_assert(sizeof(non_empty_data) == 2, "");
+_Static_assert('j' == non_empty_data[0], "");
+_Static_assert('k' == non_empty_data[1], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
new file mode 100644
index 000000000000000..28a94fe9430f033
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> limit(1)
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('j' == data[0], "");
+_Static_assert('k' == data[1], "");
+_Static_assert(sizeof(offset_data) == 1, "");
+_Static_assert('j' == offset_data[0], "");
+_Static_assert(offset_data[0] == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
new file mode 100644
index 000000000000000..71a029544dca556
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> clang::offset(1)
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('j' == data[0], "");
+_Static_assert('k' == data[1], "");
+_Static_assert(sizeof(offset_data) == 1, "");
+_Static_assert('k' == offset_data[0], "");
+_Static_assert(offset_data[0] == data[1], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
new file mode 100644
index 000000000000000..5182a2b874d3991
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> prefix('\xA', )
+};
+const char empty_data[] = {
+#embed <media/empty> prefix('\xA', )
+1
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('\xA' == data[0], "");
+_Static_assert('b' == data[1], "");
+_Static_assert(sizeof(empty_data) == 1, "");
+_Static_assert(1 == empty_data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
new file mode 100644
index 000000000000000..11c3f2bbbfb2bb6
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> suffix(, '\xA')
+};
+const char empty_data[] = {
+#embed <media/empty> suffix(, '\xA')
+1
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('b' == data[0], "");
+_Static_assert('\xA' == data[1], "");
+_Static_assert(sizeof(empty_data) == 1, "");
+_Static_assert(1 == empty_data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
new file mode 100644
index 000000000000000..1f043ccd2ff54bf
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#embed __FILE__ unrecognized
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized' ignored}}
+#embed __FILE__ unrecognized::param
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}}
+#embed __FILE__ unrecognized::param(with, args)
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}}
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
new file mode 100644
index 000000000000000..5c33871c0c8a4d8
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+const char data[] = {
+#embed <single_byte.txt>
+};
+_Static_assert(sizeof(data) == 1, "");
+_Static_assert('b' == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
new file mode 100644
index 000000000000000..791cd9176ebe0ab
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+const char data[] = {
+#embed "single_byte.txt"
+};
+_Static_assert(sizeof(data) == 1, "");
+_Static_assert('a' == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt
new file mode 100644
index 000000000000000..2e65efe2a145dda
--- /dev/null
+++ b/clang/test/Preprocessor/single_byte.txt
@@ -0,0 +1 @@
+a
\ No newline at end of file
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 103c08ffbe83b38..8f9d7c77ccd150f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -777,6 +777,13 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
"Semicolon-separated list of components to include in libLLVM, or \"all\".")
endif()
+option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON)
+# Quiet down MSVC-style secure CRT warnings
+if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS)
+ add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1)
+endif()
+
+
if(MSVC)
option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON)
# Set this variable to OFF here so it can't be set with a command-line
diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake
index 1be13bc01ab9b25..828227f2f25a2f0 100644
--- a/llvm/cmake/modules/GetHostTriple.cmake
+++ b/llvm/cmake/modules/GetHostTriple.cmake
@@ -2,7 +2,7 @@
# Invokes config.guess
function( get_host_triple var )
- if( MSVC )
+ if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") )
if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" )
set( value "aarch64-pc-windows-msvc" )
elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" )
@@ -41,7 +41,7 @@ function( get_host_triple var )
else()
set( value "powerpc-ibm-aix" )
endif()
- else( MSVC )
+ else()
if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS)
message(WARNING "unable to determine host target triple")
else()
@@ -55,6 +55,6 @@ function( get_host_triple var )
endif( NOT TT_RV EQUAL 0 )
set( value ${TT_OUT} )
endif()
- endif( MSVC )
+ endif()
set( ${var} ${value} PARENT_SCOPE )
endfunction( get_host_triple var )
>From 6a7a4c959f1635f5c3549010d277b5834a3e3fe2 Mon Sep 17 00:00:00 2001
From: ThePhD <phdofthehouse at gmail.com>
Date: Sun, 8 Oct 2023 17:43:51 -0400
Subject: [PATCH 02/29] =?UTF-8?q?=E2=9C=A8=20Speedy=20#embed=20implementat?=
=?UTF-8?q?ion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
⚡ [Lex] Better reservations for improved performance/memory usage.
🛠 [Lex, Frontend] Remove comma hardcoding since we are servicing a full file
apply suggestions from git-clang-format
---
clang/include/clang/AST/Expr.h | 51 ++
clang/include/clang/AST/RecursiveASTVisitor.h | 1 +
.../clang/Basic/DiagnosticCommonKinds.td | 6 +
clang/include/clang/Basic/FileManager.h | 5 +-
clang/include/clang/Basic/StmtNodes.td | 1 +
clang/include/clang/Basic/TokenKinds.def | 6 +-
.../Frontend/PreprocessorOutputOptions.h | 3 +-
.../include/clang/Lex/PPDirectiveParameter.h | 32 ++
clang/include/clang/Lex/PPEmbedParameters.h | 78 ++++
clang/include/clang/Lex/Preprocessor.h | 42 +-
clang/include/clang/Sema/Sema.h | 37 ++
.../include/clang/Serialization/ASTBitCodes.h | 3 +
clang/lib/AST/Expr.cpp | 16 +
clang/lib/AST/ExprClassification.cpp | 5 +
clang/lib/AST/ExprConstant.cpp | 8 +
clang/lib/AST/ItaniumMangle.cpp | 1 +
clang/lib/AST/StmtPrinter.cpp | 7 +
clang/lib/AST/StmtProfile.cpp | 2 +
clang/lib/Basic/FileManager.cpp | 1 -
clang/lib/Basic/IdentifierTable.cpp | 6 +-
clang/lib/Driver/ToolChains/Clang.cpp | 3 +-
clang/lib/Format/TokenAnnotator.cpp | 3 +-
clang/lib/Frontend/DependencyFile.cpp | 15 +-
clang/lib/Frontend/DependencyGraph.cpp | 2 +-
.../lib/Frontend/PrintPreprocessedOutput.cpp | 14 +-
clang/lib/Interpreter/Interpreter.cpp | 1 +
clang/lib/Lex/Lexer.cpp | 8 +
clang/lib/Lex/PPDirectives.cpp | 434 ++++++++++++++----
clang/lib/Lex/PPMacroExpansion.cpp | 23 +-
clang/lib/Lex/Preprocessor.cpp | 6 +-
clang/lib/Parse/ParseExpr.cpp | 104 +++++
clang/lib/Parse/ParseTemplate.cpp | 2 +
clang/lib/Sema/SemaDecl.cpp | 48 ++
clang/lib/Sema/SemaDeclCXX.cpp | 3 +-
clang/lib/Sema/SemaExceptionSpec.cpp | 1 +
clang/lib/Sema/SemaExpr.cpp | 239 +++++++++-
clang/lib/Sema/SemaTemplate.cpp | 56 +++
clang/lib/Sema/TreeTransform.h | 6 +
clang/lib/Serialization/ASTReaderStmt.cpp | 13 +
clang/lib/Serialization/ASTWriterStmt.cpp | 10 +
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 4 +
clang/test/Preprocessor/embed_art.c | 106 +++++
clang/test/Preprocessor/embed_single_entity.c | 7 +
clang/test/Preprocessor/embed_weird.cpp | 68 +++
llvm/include/llvm/Support/Base64.h | 36 +-
45 files changed, 1351 insertions(+), 172 deletions(-)
create mode 100644 clang/include/clang/Lex/PPDirectiveParameter.h
create mode 100644 clang/include/clang/Lex/PPEmbedParameters.h
create mode 100644 clang/test/Preprocessor/embed_art.c
create mode 100644 clang/test/Preprocessor/embed_single_entity.c
create mode 100644 clang/test/Preprocessor/embed_weird.cpp
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index b69c616b0090365..d3fba205c91c934 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4805,6 +4805,57 @@ class SourceLocExpr final : public Expr {
friend class ASTStmtReader;
};
+/// Represents a function call to __builtin_pp_embed().
+class PPEmbedExpr final : public Expr {
+ SourceLocation BuiltinLoc, RParenLoc;
+ DeclContext *ParentContext;
+ StringLiteral *Filename;
+ StringLiteral *BinaryData;
+
+public:
+ enum Action {
+ NotFound,
+ FoundOne,
+ Expanded,
+ };
+
+ PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, StringLiteral *Filename,
+ StringLiteral *BinaryData, SourceLocation BLoc,
+ SourceLocation RParenLoc, DeclContext *Context);
+
+ /// Build an empty call expression.
+ explicit PPEmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {}
+
+ /// If the PPEmbedExpr has been resolved return the subexpression
+ /// representing the resolved value. Otherwise return null.
+ const DeclContext *getParentContext() const { return ParentContext; }
+ DeclContext *getParentContext() { return ParentContext; }
+
+ SourceLocation getLocation() const { return BuiltinLoc; }
+ SourceLocation getBeginLoc() const { return BuiltinLoc; }
+ SourceLocation getEndLoc() const { return RParenLoc; }
+
+ StringLiteral *getFilenameStringLiteral() const { return Filename; }
+ StringLiteral *getDataStringLiteral() const { return BinaryData; }
+
+ size_t getDataElementCount(ASTContext &Context) const;
+
+ child_range children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+
+ const_child_range children() const {
+ return const_child_range(child_iterator(), child_iterator());
+ }
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == PPEmbedExprClass;
+ }
+
+private:
+ friend class ASTStmtReader;
+};
+
/// Describes an C or C++ initializer list.
///
/// InitListExpr describes an initializer list, which can be used to
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 3dd23eb38eeabfc..6b7211bb0a0d3f1 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2809,6 +2809,7 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
DEF_TRAVERSE_STMT(StmtExpr, {})
DEF_TRAVERSE_STMT(SourceLocExpr, {})
+DEF_TRAVERSE_STMT(PPEmbedExpr, {})
DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index f2df283c74829f6..4df86e35eebde38 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -59,6 +59,9 @@ def err_expected_string_literal : Error<"expected string literal "
"'external_source_symbol' attribute|"
"as argument of '%1' attribute}0">;
+def err_builtin_pp_embed_invalid_argument : Error<
+ "invalid argument to '__builtin_pp_embed': %0">;
+
def err_invalid_string_udl : Error<
"string literal with user-defined suffix cannot be used here">;
def err_invalid_character_udl : Error<
@@ -80,6 +83,9 @@ def err_expected : Error<"expected %0">;
def err_expected_either : Error<"expected %0 or %1">;
def err_expected_after : Error<"expected %1 after %0">;
+def err_builtin_pp_embed_invalid_location : Error<
+ "'__builtin_pp_embed' in invalid location: %0%select{|%2}1">;
+
def err_param_redefinition : Error<"redefinition of parameter %0">;
def warn_method_param_redefinition : Warning<"redefinition of method parameter %0">;
def warn_method_param_declaration : Warning<"redeclaration of method parameter %0">,
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index c757f8775b425e9..cbfcb292778e5f7 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -282,8 +282,9 @@ class FileManager : public RefCountedBase<FileManager> {
getBufferForFile(StringRef Filename, bool isVolatile = false,
bool RequiresNullTerminator = true,
std::optional<int64_t> MaybeLimit = std::nullopt) {
- return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile,
- RequiresNullTerminator);
+ return getBufferForFileImpl(Filename,
+ /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1),
+ isVolatile, RequiresNullTerminator);
}
private:
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index cec301dfca2817b..e3be997dd1c86e0 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -203,6 +203,7 @@ def OpaqueValueExpr : StmtNode<Expr>;
def TypoExpr : StmtNode<Expr>;
def RecoveryExpr : StmtNode<Expr>;
def BuiltinBitCastExpr : StmtNode<ExplicitCastExpr>;
+def PPEmbedExpr : StmtNode<Expr>;
// Microsoft Extensions.
def MSPropertyRefExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 19a66fbb0731194..167bd614efe7bd9 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -154,10 +154,6 @@ TOK(eod) // End of preprocessing directive (end of line inside a
// directive).
TOK(code_completion) // Code completion marker
-// #embed speed support
-TOK(builtin_embed)
-
-
// C99 6.4.9: Comments.
TOK(comment) // Comment (only in -E -C[C] mode)
@@ -758,6 +754,7 @@ ALIAS("__char32_t" , char32_t , KEYCXX)
KEYWORD(__builtin_bit_cast , KEYALL)
KEYWORD(__builtin_available , KEYALL)
KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL)
+KEYWORD(__builtin_pp_embed , KEYALL)
// Keywords defined by Attr.td.
#ifndef KEYWORD_ATTRIBUTE
@@ -993,6 +990,7 @@ ANNOTATION(repl_input_end)
#undef CXX11_KEYWORD
#undef KEYWORD
#undef PUNCTUATOR
+#undef BUILTINOK
#undef TOK
#undef C99_KEYWORD
#undef C23_KEYWORD
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index 3e36db3f8ce46ea..0bc32c65a58d2d8 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -22,7 +22,8 @@ class PreprocessorOutputOptions {
unsigned ShowMacroComments : 1; ///< Show comments, even in macros.
unsigned ShowMacros : 1; ///< Print macro definitions.
unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output.
- unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed output.
+ unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed
+ ///< output.
unsigned RewriteIncludes : 1; ///< Preprocess include directives only.
unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules.
unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h
new file mode 100644
index 000000000000000..fc413c345adc539
--- /dev/null
+++ b/clang/include/clang/Lex/PPDirectiveParameter.h
@@ -0,0 +1,32 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+
+#include "clang/Basic/SourceLocation.h"
+
+namespace clang {
+
+/// Captures basic information about a preprocessor directive parameter.
+class PPDirectiveParameter {
+public:
+ SourceLocation Start;
+ SourceLocation End;
+
+ PPDirectiveParameter(SourceLocation Start, SourceLocation End)
+ : Start(Start), End(End) {}
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
new file mode 100644
index 000000000000000..7b76d2d573c23bd
--- /dev/null
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -0,0 +1,78 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+
+#include "clang/Lex/PPDirectiveParameter.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+
+/// Preprocessor extension embed parameter "clang::offset"
+/// `clang::offset( constant-expression )`
+class PPEmbedParameterOffset : public PPDirectiveParameter {
+public:
+ size_t Offset;
+
+ PPEmbedParameterOffset(size_t Offset, SourceLocation Start,
+ SourceLocation End)
+ : Offset(Offset), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "limit"
+/// `limit( constant-expression )`
+class PPEmbedParameterLimit : public PPDirectiveParameter {
+public:
+ size_t Limit;
+
+ PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End)
+ : Limit(Limit), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "prefix"
+/// `prefix( balanced-token-seq )`
+class PPEmbedParameterPrefix : public PPDirectiveParameter {
+public:
+ SmallVector<Token, 2> Tokens;
+
+ PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+ SourceLocation End)
+ : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "suffix"
+/// `suffix( balanced-token-seq )`
+class PPEmbedParameterSuffix : public PPDirectiveParameter {
+public:
+ SmallVector<Token, 2> Tokens;
+
+ PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+ SourceLocation End)
+ : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "if_empty"
+/// `if_empty( balanced-token-seq )`
+class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
+public:
+ SmallVector<Token, 2> Tokens;
+
+ PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
+ SourceLocation End)
+ : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 7470bf5882730cb..58012fb79559e22 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,6 +29,7 @@
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PPEmbedParameters.h"
#include "clang/Lex/Token.h"
#include "clang/Lex/TokenLexer.h"
#include "llvm/ADT/APSInt.h"
@@ -1165,6 +1166,9 @@ class Preprocessor {
void updateOutOfDateIdentifier(IdentifierInfo &II) const;
+ /// Buffers for used #embed directives
+ std::vector<std::string> EmbedBuffers;
+
public:
Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
DiagnosticsEngine &diags, const LangOptions &LangOpts,
@@ -1735,15 +1739,15 @@ class Preprocessor {
bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
struct LexEmbedParametersResult {
- bool Successful;
- std::optional<size_t> MaybeLimitParam;
- std::optional<size_t> MaybeOffsetParam;
- std::optional<SmallVector<Token, 2>> MaybeIfEmptyParam;
- std::optional<SmallVector<Token, 2>> MaybePrefixParam;
- std::optional<SmallVector<Token, 2>> MaybeSuffixParam;
- int UnrecognizedParams;
+ std::optional<PPEmbedParameterLimit> MaybeLimitParam;
+ std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
+ std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
+ std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
+ std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
SourceLocation StartLoc;
SourceLocation EndLoc;
+ int UnrecognizedParams;
+ bool Successful;
};
LexEmbedParametersResult LexEmbedParameters(Token &Current,
@@ -1812,7 +1816,8 @@ class Preprocessor {
/// Parses a simple integer literal to get its numeric value. Floating
/// point literals and user defined literals are rejected. Used primarily to
/// handle pragmas that accept integer arguments.
- bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
+ bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value,
+ bool WithLex = true);
/// Disables macro expansion everywhere except for preprocessor directives.
void SetMacroExpansionOnlyInDirectives() {
@@ -2441,8 +2446,7 @@ class Preprocessor {
/// reference is for system \#include's or not (i.e. using <> instead of "").
OptionalFileEntryRef
LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
- bool OpenFile,
- const FileEntry *LookupFromFile = nullptr,
+ bool OpenFile, const FileEntry *LookupFromFile = nullptr,
SmallVectorImpl<char> *SearchPath = nullptr,
SmallVectorImpl<char> *RelativePath = nullptr);
@@ -2735,12 +2739,18 @@ class Preprocessor {
// Binary data inclusion
void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
const FileEntry *LookupFromFile = nullptr);
- void HandleEmbedDirectiveNaive(
- SourceLocation FilenameTok, LexEmbedParametersResult &Params,
- StringRef BinaryContents, const size_t TargetCharWidth);
- void HandleEmbedDirectiveBuiltin(
- SourceLocation FilenameTok, LexEmbedParametersResult &Params,
- StringRef BinaryContents, const size_t TargetCharWidth);
+ void HandleEmbedDirectiveNaive(SourceLocation HashLoc,
+ SourceLocation FilenameTok,
+ const LexEmbedParametersResult &Params,
+ StringRef BinaryContents,
+ const size_t TargetCharWidth);
+ void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc,
+ const Token &FilenameTok,
+ StringRef ResolvedFilename,
+ StringRef SearchPath, StringRef RelativePath,
+ const LexEmbedParametersResult &Params,
+ StringRef BinaryContents,
+ const size_t TargetCharWidth);
// File inclusion.
void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 2ebd21090ae4e11..d3c62d8e75650eb 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5981,6 +5981,10 @@ class Sema final {
ArrayRef<Expr *> Arg, SourceLocation RParenLoc,
Expr *Config = nullptr, bool IsExecConfig = false,
ADLCallKind UsesADL = ADLCallKind::NotADL);
+ /// `Fn` may be a null pointer.
+ void ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc,
+ SmallVectorImpl<Expr *> &ArgExprs,
+ SourceLocation RParenLoc);
ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
MultiExprArg ExecConfig,
@@ -6098,6 +6102,35 @@ class Sema final {
SourceLocation BuiltinLoc,
SourceLocation RPLoc);
+ // __builtin_pp_embed()
+ ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
+ SourceLocation Base64DataLocation,
+ SourceLocation RPLoc, StringLiteral *Filename,
+ QualType DataTy, std::vector<char> BinaryData);
+
+ IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed);
+
+ PPEmbedExpr::Action
+ CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
+ std::optional<QualType> MaybeInitType);
+ PPEmbedExpr::Action
+ ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
+ SmallVectorImpl<Expr *> &OutputExprList,
+ bool ClearOutputFirst = true);
+ PPEmbedExpr::Action
+ ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &OutputList);
+
+ enum PPEmbedExprContext {
+ PPEEC__StaticAssert,
+ PPEEC_StaticAssert,
+ };
+
+ StringRef GetLocationName(PPEmbedExprContext Context) const;
+
+ bool DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
+ PPEmbedExprContext Context,
+ bool SingleAllowed = true);
+
// Build a potentially resolved SourceLocExpr.
ExprResult BuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
QualType ResultTy, SourceLocation BuiltinLoc,
@@ -8290,6 +8323,10 @@ class Sema final {
SourceLocation EqualLoc,
ParsedTemplateArgument DefaultArg);
+ void ModifyTemplateArguments(
+ const TemplateTy &Template,
+ SmallVectorImpl<ParsedTemplateArgument> &TemplateArgs);
+
TemplateParameterList *
ActOnTemplateParameterList(unsigned Depth,
SourceLocation ExportLoc,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5c32fbc079c9a65..138c52bc8149fc8 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1715,6 +1715,9 @@ enum StmtCode {
/// A SourceLocExpr record.
EXPR_SOURCE_LOC,
+ /// A PPEmbedExpr record.
+ EXPR_BUILTIN_PP_EMBED,
+
/// A ShuffleVectorExpr record.
EXPR_SHUFFLE_VECTOR,
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 4bfc4f082cd6a69..f0c0359cd9feaf9 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2392,6 +2392,21 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
llvm_unreachable("unhandled case");
}
+PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy,
+ StringLiteral *Filename, StringLiteral *BinaryData,
+ SourceLocation BLoc, SourceLocation RParenLoc,
+ DeclContext *ParentContext)
+ : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary),
+ BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext),
+ Filename(Filename), BinaryData(BinaryData) {
+ setDependence(ExprDependence::None);
+}
+
+size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const {
+ return getDataStringLiteral()->getByteLength() /
+ (Context.getTypeSize(getType()) / Context.getTypeSize(Context.CharTy));
+}
+
InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc,
ArrayRef<Expr *> initExprs, SourceLocation rbraceloc)
: Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary),
@@ -3610,6 +3625,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
case CXXUuidofExprClass:
case OpaqueValueExprClass:
case SourceLocExprClass:
+ case PPEmbedExprClass:
case ConceptSpecializationExprClass:
case RequiresExprClass:
case SYCLUniqueStableNameExprClass:
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index ffa7c6802ea6e19..fbbbd72b1445716 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
case Expr::RequiresExprClass:
return Cl::CL_PRValue;
+ case Expr::PPEmbedExprClass:
+ // Nominally, this just goes through as a PRValue until we actually expand
+ // it and check it.
+ return Cl::CL_PRValue;
+
// Make HLSL this reference-like
case Expr::CXXThisExprClass:
return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e5539dedec02a4b..b6967cc97d78c5d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -8921,6 +8921,11 @@ class PointerExprEvaluator
return true;
}
+ bool VisitPPEmbedExpr(const PPEmbedExpr *E) {
+ llvm_unreachable("Not yet implemented for ExprConstant.cpp");
+ return true;
+ }
+
bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) {
std::string ResultStr = E->ComputeName(Info.Ctx);
@@ -16166,6 +16171,9 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
return ICEDiag(IK_NotICE, E->getBeginLoc());
return CheckICE(cast<CastExpr>(E)->getSubExpr(), Ctx);
}
+ case Expr::PPEmbedExprClass: {
+ return ICEDiag(IK_ICE, E->getBeginLoc());
+ }
}
llvm_unreachable("Invalid StmtClass!");
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 23ec35cae4b7b40..f08fb766efd777d 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4721,6 +4721,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
case Expr::PseudoObjectExprClass:
case Expr::AtomicExprClass:
case Expr::SourceLocExprClass:
+ case Expr::PPEmbedExprClass:
case Expr::BuiltinBitCastExprClass:
{
NotPrimaryExpr();
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index a31aa0cfeeed8de..f94386be7788474 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -49,6 +49,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Base64.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1145,6 +1146,12 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
OS << Node->getBuiltinStr() << "()";
}
+void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) {
+ OS << "__builtin_pp_embed(" << Node->getType() << ", "
+ << Node->getFilenameStringLiteral()->getBytes() << ", \""
+ << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")";
+}
+
void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
PrintExpr(Node->getSubExpr());
}
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 22b6855b0fff23c..0be044f54a819ee 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2284,6 +2284,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) {
VisitExpr(E);
}
+void StmtProfiler::VisitPPEmbedExpr(const PPEmbedExpr *E) { VisitExpr(E); }
+
void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); }
void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) {
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index e0e80b5e0fbedbe..d8a5b56438ad33d 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -549,7 +549,6 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
if (MaybeLimit)
FileSize = *MaybeLimit;
-
// If there's a high enough chance that the file have changed since we
// got its size, force a stat before opening it.
if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d2b5426d27bb3b2..96ac3663ca6658b 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -422,8 +422,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
// collisions (if there were, the switch below would complain about duplicate
// case values). Note that this depends on 'if' being null terminated.
-#define HASH(LEN, FIRST, THIRD) \
- (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63)
+#define HASH(LEN, FIRST, THIRD) \
+ (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
#define CASE(LEN, FIRST, THIRD, NAME) \
case HASH(LEN, FIRST, THIRD): \
return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -438,7 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
CASE( 4, 'e', 's', else);
CASE( 4, 'l', 'n', line);
CASE( 4, 's', 'c', sccs);
- CASE( 5, 'e', 'b', embed);
+ CASE(5, 'e', 'b', embed);
CASE( 5, 'e', 'd', endif);
CASE( 5, 'e', 'r', error);
CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fc2f749a34fc471..53a92502b463b57 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1324,7 +1324,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
Args.addAllArgs(CmdArgs,
{options::OPT_D, options::OPT_U, options::OPT_I_Group,
- options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group});
+ options::OPT_F, options::OPT_index_header_map,
+ options::OPT_EmbedPath_Group});
// Add -Wp, and -Xpreprocessor if using the preprocessor.
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index e405a9085951dc0..0a3c16f3a669c70 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1399,8 +1399,7 @@ class AnnotatingParser {
if (Tok->isOneOf(Keywords.kw___has_include,
Keywords.kw___has_include_next)) {
parseHasInclude();
- }
- else if (Tok->is(Keywords.kw___has_embed)) {
+ } else if (Tok->is(Keywords.kw___has_embed)) {
parseHasEmbed();
}
if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 10558b1d34bf623..04ddb92ff7f7b67 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -65,11 +65,11 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
/*IsMissing=*/false);
}
- void EmbedDirective(SourceLocation HashLoc,
- StringRef FileName, bool IsAngled,
- CharSourceRange FilenameRange, CharSourceRange ParametersRange,
- OptionalFileEntryRef File, StringRef SearchPath,
- StringRef RelativePath) override {
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange,
+ CharSourceRange ParametersRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath) override {
if (!File)
DepCollector.maybeAddDependency(FileName,
/*FromModule*/ false,
@@ -97,14 +97,13 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
}
void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
- OptionalFileEntryRef File) override {
+ OptionalFileEntryRef File) override {
if (!File)
return;
StringRef Filename =
llvm::sys::path::remove_leading_dotslash(File->getName());
DepCollector.maybeAddDependency(Filename,
- /*FromModule=*/false,
- false,
+ /*FromModule=*/false, false,
/*IsModuleFile=*/false,
&PP.getFileManager(),
/*IsMissing=*/false);
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 683f751a94244ec..4049a5245de7d34 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -53,7 +53,7 @@ class DependencyGraphCallback : public PPCallbacks {
DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
StringRef SysRoot,
DirectiveBehavior Action = IgnoreEmbed)
- : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
+ : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {}
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index fb9baa92e6836d3..1d93ad97305da87 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -107,9 +107,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
public:
PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
- bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives,
- bool UseLineDirectives, bool MinimizeWhitespace,
- bool DirectivesOnly, bool KeepSystemIncludes)
+ bool defines, bool DumpIncludeDirectives,
+ bool DumpEmbedDirectives, bool UseLineDirectives,
+ bool MinimizeWhitespace, bool DirectivesOnly,
+ bool KeepSystemIncludes)
: PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
DisableLineMarkers(lineMarkers), DumpDefines(defines),
DumpIncludeDirectives(DumpIncludeDirectives),
@@ -414,7 +415,7 @@ void PrintPPOutputPPCallbacks::EmbedDirective(
if (DumpEmbedDirectives) {
MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
*OS << "#embed " << (IsAngled ? '<' : '"') << FileName
- << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
+ << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
setEmittedDirectiveOnThisLine();
}
}
@@ -1002,8 +1003,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
- Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives,
- Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
+ Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives,
+ Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+ Opts.KeepSystemIncludes);
// Expand macros in pragmas with -fms-extensions. The assumption is that
// the majority of pragmas in such a file will be Microsoft pragmas.
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 7968c62cbd3e7b3..e2e55daa77b854a 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -566,6 +566,7 @@ class RuntimeInterfaceBuilder
CStyleCastPtrExpr(S, Ctx.VoidPtrTy, (uintptr_t)Ty.getAsOpaquePtr());
// The QualType parameter `OpaqueType`, represented as `void*`.
Args.push_back(TypeArg);
+ S.ModifyCallExprArguments(nullptr, E->getBeginLoc(), Args, E->getEndLoc());
// We push the last parameter based on the type of the Expr. Note we need
// special care for rvalue struct.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index feed1b9ecd71a8d..b55b4c360d44298 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -417,6 +417,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
}
}
+ // NOTE: this is to prevent a few cases where token streams with
+ // commas are used to print with pseudo-locations after a faux-expansion
+ // cause reading a bogus location from a source file that does not exist.
+ if (Tok.is(tok::comma)) {
+ Buffer = ",";
+ return 1;
+ }
+
// NOTE: this can be checked even after testing for an IdentifierInfo.
if (Tok.isLiteral())
TokStart = Tok.getLiteralData();
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e0d98d7ca03fa11..1696c1a40c3d46b 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -42,11 +42,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Base64.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
#include <cassert>
+#include <cmath>
#include <cstring>
#include <new>
#include <optional>
@@ -3631,10 +3633,12 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
SmallVector<Token, 2> ParameterTokens;
tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod;
Result.StartLoc = CurTok.getLocation();
+ Result.EndLoc = CurTok.getLocation();
for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) {
Parameter.clear();
// Lex identifier [:: identifier ...]
if (!CurTok.is(tok::identifier)) {
+ Result.EndLoc = CurTok.getEndLoc();
Diag(CurTok, diag::err_expected) << "identifier";
DiscardUntilEndOfDirective();
return Result;
@@ -3647,6 +3651,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
Parameter.append("::");
LexNonComment(CurTok);
if (!CurTok.is(tok::identifier)) {
+ Result.EndLoc = CurTok.getEndLoc();
Diag(CurTok, diag::err_expected) << "identifier";
DiscardUntilEndOfDirective();
return Result;
@@ -3670,25 +3675,19 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
return Result;
}
const llvm::APSInt &LimitResult = *LimitEvalResult.Value;
- const bool ValueDoesNotFit =
- LimitResult.getBitWidth() > 64
- ? true
- : (LimitResult.isUnsigned() ||
- (LimitResult.isSigned() && LimitResult.isNegative()));
- if (ValueDoesNotFit) {
+ if (LimitResult.getBitWidth() > 64) {
Diag(CurTok, diag::warn_pp_expr_overflow);
- // just truncate and roll with that, I guess?
- Result.MaybeLimitParam =
- static_cast<size_t>(LimitResult.getRawData()[0]);
- } else {
- Result.MaybeLimitParam =
- static_cast<size_t>(LimitResult.getZExtValue());
}
+ size_t LimitValue = 0;
+ LimitValue = LimitResult.getLimitedValue();
+ Result.MaybeLimitParam = PPEmbedParameterLimit{
+ LimitValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()};
LexNonComment(CurTok);
} else if (Parameter == "clang::offset") {
// we have a limit parameter and its internals are processed using
// evaluation rules from #if - handle here
if (CurTok.isNot(tok::l_paren)) {
+ Result.EndLoc = CurTok.getEndLoc();
Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
DiscardUntilEndOfDirective();
return Result;
@@ -3697,18 +3696,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
DirectiveEvalResult OffsetEvalResult =
EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
if (!OffsetEvalResult.Value) {
+ Result.EndLoc = CurTok.getEndLoc();
return Result;
}
const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value;
+ size_t OffsetValue;
if (OffsetResult.getBitWidth() > 64) {
Diag(CurTok, diag::warn_pp_expr_overflow);
- // just truncate and roll with that, I guess?
- Result.MaybeOffsetParam =
- static_cast<size_t>(OffsetResult.getRawData()[0]);
- } else {
- Result.MaybeOffsetParam =
- static_cast<size_t>(OffsetResult.getZExtValue());
}
+ OffsetValue = OffsetResult.getLimitedValue();
+ Result.MaybeOffsetParam = PPEmbedParameterOffset{
+ OffsetValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()};
LexNonComment(CurTok);
} else {
if (CurTok.is(tok::l_paren)) {
@@ -3764,6 +3762,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
return true;
};
if (!ParseArgToken()) {
+ Result.EndLoc = CurTok.getEndLoc();
return Result;
}
if (!CurTok.is(tok::r_paren)) {
@@ -3775,14 +3774,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
}
// "Token-soup" parameters
if (Parameter == "if_empty") {
- // TODO: integer list optimization
- Result.MaybeIfEmptyParam = std::move(ParameterTokens);
+ Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
+ std::move(ParameterTokens), ParameterStartTok.getLocation(),
+ CurTok.getLocation()};
} else if (Parameter == "prefix") {
- // TODO: integer list optimization
- Result.MaybePrefixParam = std::move(ParameterTokens);
+ Result.MaybePrefixParam = PPEmbedParameterPrefix{
+ std::move(ParameterTokens), ParameterStartTok.getLocation(),
+ CurTok.getLocation()};
} else if (Parameter == "suffix") {
- // TODO: integer list optimization
- Result.MaybeSuffixParam = std::move(ParameterTokens);
+ Result.MaybeSuffixParam = PPEmbedParameterSuffix{
+ std::move(ParameterTokens), ParameterStartTok.getLocation(),
+ CurTok.getLocation()};
} else {
++Result.UnrecognizedParams;
if (DiagnoseUnknown) {
@@ -3793,6 +3795,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
}
}
Result.Successful = true;
+ Result.EndLoc = CurTok.getEndLoc();
return Result;
}
@@ -3823,89 +3826,327 @@ inline constexpr const char *IntegerLiterals[] = {
"242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
"253", "254", "255"};
-void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc,
- LexEmbedParametersResult &Params,
- StringRef BinaryContents,
- const size_t TargetCharWidth) {
- (void)TargetCharWidth; // for later, when we support various sizes
- size_t TokenIndex = 0;
- const size_t InitListTokensSize = [&]() {
- if (BinaryContents.empty()) {
- if (Params.MaybeIfEmptyParam) {
- return Params.MaybeIfEmptyParam->size();
+static size_t
+ComputeNaiveReserveSize(const Preprocessor::LexEmbedParametersResult &Params,
+ StringRef TypeName, StringRef BinaryContents,
+ SmallVectorImpl<char> &TokSpellingBuffer) {
+ size_t ReserveSize = 0;
+ if (BinaryContents.empty()) {
+ if (Params.MaybeIfEmptyParam) {
+ for (const auto &Tok : Params.MaybeIfEmptyParam->Tokens) {
+ const size_t TokLen = Tok.getLength();
+ if (TokLen > TokSpellingBuffer.size()) {
+ TokSpellingBuffer.resize(TokLen);
+ }
+ ReserveSize += TokLen;
+ }
+ }
+ } else {
+ if (Params.MaybePrefixParam) {
+ for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
+ const size_t TokLen = Tok.getLength();
+ if (TokLen > TokSpellingBuffer.size()) {
+ TokSpellingBuffer.resize(TokLen);
+ }
+ ReserveSize += TokLen;
+ }
+ }
+ for (const auto &Byte : BinaryContents) {
+ ReserveSize += 3 + TypeName.size(); // ((type-name)
+ if (Byte > 99) {
+ ReserveSize += 3; // ###
+ } else if (Byte > 9) {
+ ReserveSize += 2; // ##
} else {
- return static_cast<size_t>(0);
+ ReserveSize += 1; // #
}
- } else {
- return static_cast<size_t>(
- (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) +
- (BinaryContents.size() * 2 - 1) +
- (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0));
+ ReserveSize += 2; // ),
}
- }();
- std::unique_ptr<Token[]> InitListTokens(new Token[InitListTokensSize]());
+ if (Params.MaybePrefixParam) {
+ for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
+ const size_t TokLen = Tok.getLength();
+ if (TokLen > TokSpellingBuffer.size()) {
+ TokSpellingBuffer.resize(TokLen);
+ }
+ ReserveSize += TokLen;
+ }
+ }
+ }
+ return ReserveSize;
+}
+void Preprocessor::HandleEmbedDirectiveNaive(
+ SourceLocation HashLoc, SourceLocation FilenameLoc,
+ const LexEmbedParametersResult &Params, StringRef BinaryContents,
+ const size_t TargetCharWidth) {
+ // Load up a new embed buffer for this file and set of parameters in
+ // particular.
+ EmbedBuffers.push_back("");
+ size_t EmbedBufferNumber = EmbedBuffers.size();
+ std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
+ llvm::Twine EmbedBufferName = [](const std::string &Number) {
+ llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
+ return PrefixNumber.concat(">");
+ }(EmbedBufferNumberVal);
+ std::string &TargetEmbedBuffer = EmbedBuffers.back();
+ const size_t TotalSize = BinaryContents.size();
+ // In the future, this might change/improve.
+ const StringRef TypeName = "unsigned char";
+
+ SmallVector<char, 32> TokSpellingBuffer(32, 0);
+ const size_t ReserveSize = ComputeNaiveReserveSize(
+ Params, TypeName, BinaryContents, TokSpellingBuffer);
+ TargetEmbedBuffer.reserve(ReserveSize);
+
+ // Generate the look-alike source file
if (BinaryContents.empty()) {
if (Params.MaybeIfEmptyParam) {
- std::copy(Params.MaybeIfEmptyParam->begin(),
- Params.MaybeIfEmptyParam->end(), InitListTokens.get());
- TokenIndex += Params.MaybeIfEmptyParam->size();
- assert(TokenIndex == InitListTokensSize);
- EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true,
- true);
+ const PPEmbedParameterIfEmpty &EmptyParam = *Params.MaybeIfEmptyParam;
+ for (const auto &Tok : EmptyParam.Tokens) {
+ StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+ TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+ }
+ }
+ } else {
+ if (Params.MaybePrefixParam) {
+ const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
+ for (const auto &Tok : PrefixParam.Tokens) {
+ StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+ TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+ }
+ }
+ for (size_t I = 0; I < TotalSize; ++I) {
+ unsigned char ByteValue = BinaryContents[I];
+ StringRef ByteRepresentation = IntegerLiterals[ByteValue];
+ TargetEmbedBuffer.append(2, '(');
+ TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
+ TargetEmbedBuffer.append(1, ')');
+ TargetEmbedBuffer.append(ByteRepresentation.data(),
+ ByteRepresentation.size());
+ TargetEmbedBuffer.append(1, ')');
+ bool AtEndOfContents = I == (TotalSize - 1);
+ if (!AtEndOfContents) {
+ TargetEmbedBuffer.append(1, ',');
+ }
+ }
+ if (Params.MaybeSuffixParam) {
+ const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
+ for (const auto &Tok : SuffixParam.Tokens) {
+ StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+ TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+ }
}
- return;
}
- // FIXME: this does not take the target's byte size into account;
- // will fail on many DSPs and embedded machines!
+ // Create faux-file and its ID, backed by a memory buffer.
+ std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
+ llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+ assert(EmbedMemBuffer && "Cannot create predefined source buffer");
+ FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
+ assert(EmbedBufferFID.isValid() &&
+ "Could not create FileID for #embed directive?");
+ // Start parsing the look-alike source file for the embed directive and
+ // pretend everything is normal
+ // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
+ EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
+}
+
+static bool TokenListIsCharacterArray(Preprocessor &PP,
+ const size_t TargetCharWidth,
+ bool IsPrefix,
+ const SmallVectorImpl<Token> &Tokens,
+ llvm::SmallVectorImpl<char> &Output) {
+ const bool IsSuffix = !IsPrefix;
+ size_t MaxValue =
+ static_cast<size_t>(std::pow((size_t)2, TargetCharWidth)) - 1u;
+ size_t TokenIndex = 0;
+ // if it's a suffix, we are expecting a comma first
+ // if it's a prefix, we are expecting a numeric literal first
+ bool ExpectingNumericLiteral = IsPrefix;
+ const size_t TokensSize = Tokens.size();
+ if (Tokens.empty()) {
+ return true;
+ }
+ for (; TokenIndex < TokensSize;
+ (void)++TokenIndex, ExpectingNumericLiteral = !ExpectingNumericLiteral) {
+ const Token &Tok = Tokens[TokenIndex];
+ // TODO: parse an optional, PLAIN `(unsigned char)` cast in front of the
+ // literals, since the Spec technically decrees each element is of type
+ // `unsigned char` (unless we have a potential future extension for
+ // `clang::type(meow)` as an embed parameter
+ if (ExpectingNumericLiteral) {
+ if (Tok.isNot(tok::numeric_constant)) {
+ return false;
+ }
+ uint64_t Value = {};
+ Token ParsingTok = Tok;
+ if (!PP.parseSimpleIntegerLiteral(ParsingTok, Value, false)) {
+ // numeric literal is a floating point literal or a UDL; too complex for
+ // us
+ return false;
+ }
+ if (Value > MaxValue || Value > static_cast<uint64_t>(0xFF)) {
+ // number is too large
+ return false;
+ }
+ Output.push_back((char)Value);
+ } else {
+ if (Tok.isNot(tok::comma)) {
+ return false;
+ }
+ }
+ }
+ const bool EndedOnNumber = !ExpectingNumericLiteral;
+ if (IsPrefix && EndedOnNumber) {
+ // we ended on a number: this is a failure for prefix!
+ return false;
+ }
+ const bool EndedOnComma = ExpectingNumericLiteral;
+ if (IsSuffix && EndedOnComma) {
+ // we ended on a comma: this is a failure for suffix!
+ return false;
+ }
+ // if all tokens have been consumed by the above process, then we have
+ // succeeded.
+ return TokenIndex == TokensSize;
+}
+
+static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1,
+ StringRef Bytes2, std::string &OutputBuffer) {
+ static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+ const size_t TotalSize = Bytes0.size() + Bytes1.size() + Bytes2.size();
+ const size_t Bytes0Size = Bytes0.size();
+ const size_t Bytes01Size = Bytes0.size() + Bytes1.size();
+ const size_t IndexOffset = OutputBuffer.size();
+ OutputBuffer.resize(OutputBuffer.size() + (((TotalSize + 2) / 3) * 4));
+ auto IndexInto = [&](size_t i) -> unsigned char {
+ if (i >= Bytes0Size) {
+ if (i >= Bytes01Size) {
+ return Bytes2[i - Bytes01Size];
+ }
+ return Bytes1[i - Bytes0Size];
+ }
+ return Bytes0[i];
+ };
+
+ size_t i = 0, j = 0;
+ for (size_t n = TotalSize / 3 * 3; i < n; i += 3, j += 4) {
+ uint32_t x = ((unsigned char)IndexInto(i) << 16) |
+ ((unsigned char)IndexInto(i + 1) << 8) |
+ (unsigned char)IndexInto(i + 2);
+ OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+ OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+ OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+ OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
+ }
+ if (i + 1 == TotalSize) {
+ uint32_t x = ((unsigned char)IndexInto(i) << 16);
+ OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+ OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+ OutputBuffer[IndexOffset + j + 2] = '=';
+ OutputBuffer[IndexOffset + j + 3] = '=';
+ } else if (i + 2 == TotalSize) {
+ uint32_t x = ((unsigned char)IndexInto(i) << 16) |
+ ((unsigned char)IndexInto(i + 1) << 8);
+ OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+ OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+ OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+ OutputBuffer[IndexOffset + j + 3] = '=';
+ }
+}
+
+void Preprocessor::HandleEmbedDirectiveBuiltin(
+ SourceLocation HashLoc, const Token &FilenameTok,
+ StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
+ const LexEmbedParametersResult &Params, StringRef BinaryContents,
+ const size_t TargetCharWidth) {
+ // if it's empty, just process it like a normal expanded token stream
+ if (BinaryContents.empty()) {
+ HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+ BinaryContents, TargetCharWidth);
+ return;
+ }
+ SmallVector<char, 2> BinaryPrefix{};
+ SmallVector<char, 2> BinarySuffix{};
if (Params.MaybePrefixParam) {
- std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(),
- InitListTokens.get() + TokenIndex);
- TokenIndex += Params.MaybePrefixParam->size();
- }
- for (size_t I = 0; I < BinaryContents.size(); ++I) {
- unsigned char ByteValue = BinaryContents[I];
- StringRef ByteRepresentation = IntegerLiterals[ByteValue];
- const size_t InitListIndex = TokenIndex;
- Token &IntToken = InitListTokens[InitListIndex];
- IntToken.setKind(tok::numeric_constant);
- IntToken.setLiteralData(ByteRepresentation.data());
- IntToken.setLength(ByteRepresentation.size());
- IntToken.setLocation(FilenameLoc);
- ++TokenIndex;
- bool AtEndOfContents = I == (BinaryContents.size() - 1);
- if (!AtEndOfContents) {
- const size_t CommaInitListIndex = InitListIndex + 1;
- Token &CommaToken = InitListTokens[CommaInitListIndex];
- CommaToken.setKind(tok::comma);
- CommaToken.setLocation(FilenameLoc);
- ++TokenIndex;
+ // If we ahve a prefix, validate that it's a good fit for direct data
+ // embedded (and prepare to prepend it)
+ const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
+ if (!TokenListIsCharacterArray(*this, TargetCharWidth, true,
+ PrefixParam.Tokens, BinaryPrefix)) {
+ HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+ BinaryContents, TargetCharWidth);
+ return;
}
}
if (Params.MaybeSuffixParam) {
- std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(),
- InitListTokens.get() + TokenIndex);
- TokenIndex += Params.MaybeSuffixParam->size();
+ // If we ahve a prefix, validate that it's a good fit for direct data
+ // embedding (and prepare to append it)
+ const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
+ if (!TokenListIsCharacterArray(*this, TargetCharWidth, false,
+ SuffixParam.Tokens, BinarySuffix)) {
+ HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+ BinaryContents, TargetCharWidth);
+ return;
+ }
}
- assert(TokenIndex == InitListTokensSize);
- EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false);
-}
-void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc,
- LexEmbedParametersResult &Params,
- StringRef BinaryContents,
- const size_t TargetCharWidth) {
- // TODO: implement direct built-in support
- HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
- TargetCharWidth);
+ // Load up a new embed buffer for this file and set of parameters in
+ // particular.
+ EmbedBuffers.push_back("");
+ size_t EmbedBufferNumber = EmbedBuffers.size();
+ std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
+ llvm::Twine EmbedBufferName = [](const std::string &Number) {
+ llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
+ return PrefixNumber.concat(">");
+ }(EmbedBufferNumberVal);
+ std::string &TargetEmbedBuffer = EmbedBuffers.back();
+ StringRef TypeName = "unsigned char";
+ const size_t TotalSize =
+ BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size();
+ const size_t ReserveSize = // add up for necessary size:
+ 19 // __builtin_pp_embed(
+ + TypeName.size() // type-name
+ + 2 // ,"
+ + ResolvedFilename.size() // file-name
+ + 3 // ","
+ + (((TotalSize + 2) / 3) * 4) // base64-string
+ + 2 // ");
+ ;
+ // Reserve appropriate size
+ TargetEmbedBuffer.reserve(ReserveSize);
+
+ // Generate the look-alike source file
+ TargetEmbedBuffer.append("__builtin_pp_embed(");
+ TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
+ TargetEmbedBuffer.append(",\"");
+ TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size());
+ TargetEmbedBuffer.append("\",\"");
+ // include the prefix(...) and suffix(...) binary data in the total contents
+ TripleEncodeBase64(
+ StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents,
+ StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer);
+ TargetEmbedBuffer.append("\")");
+ // Create faux-file and its ID, backed by a memory buffer.
+ std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
+ llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+ assert(EmbedMemBuffer && "Cannot create predefined source buffer");
+ FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
+ assert(EmbedBufferFID.isValid() &&
+ "Could not create FileID for #embed directive?");
+ // Start parsing the look-alike source file for the embed directive and
+ // pretend everything is normal
+ // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
+ EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
}
void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
const FileEntry *LookupFromFile) {
if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
- auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed
- : diag::warn_cxx26_pp_embed);
+ auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_embed
+ : diag::warn_c23_pp_embed);
Diag(EmbedTok, EitherDiag);
}
@@ -3952,18 +4193,16 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
return;
}
- Diag(FilenameTok, diag::err_pp_file_not_found)
- << Filename;
+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
return;
}
std::optional<int64_t> MaybeSignedLimit{};
if (Params.MaybeLimitParam) {
- if (static_cast<uint64_t>(INT64_MAX) >= *Params.MaybeLimitParam) {
- MaybeSignedLimit = static_cast<int64_t>(*Params.MaybeLimitParam);
- }
+ MaybeSignedLimit = static_cast<int64_t>(Params.MaybeLimitParam->Limit);
}
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile = getFileManager().getBufferForFile(
- *MaybeFileRef, false, false, MaybeSignedLimit);
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile =
+ getFileManager().getBufferForFile(*MaybeFileRef, false, false,
+ MaybeSignedLimit);
if (!MaybeFile) {
// could not find file
Diag(FilenameTok, diag::err_cannot_open_file)
@@ -3973,7 +4212,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
StringRef BinaryContents = MaybeFile.get()->getBuffer();
if (Params.MaybeOffsetParam) {
// offsets all the way to the end of the file make for an empty file.
- const size_t OffsetParam = *Params.MaybeOffsetParam;
+ const size_t &OffsetParam = Params.MaybeOffsetParam->Offset;
BinaryContents = BinaryContents.substr(OffsetParam);
}
const size_t TargetCharWidth = getTargetInfo().getCharWidth();
@@ -4009,11 +4248,12 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
RelativePath);
}
if (PPOpts->NoBuiltinPPEmbed) {
- HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+ HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents,
TargetCharWidth);
} else {
// emit a token directly, handle it internally.
- HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents,
+ HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath,
+ RelativePath, Params, BinaryContents,
TargetCharWidth);
}
}
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 6e0163ccc89b7fb..7f6c964b0d68a3b 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1270,8 +1270,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
// pedwarn for not being on C23
if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
- auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed
- : diag::warn_cxx26_pp_has_embed);
+ auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
+ : diag::warn_c23_pp_has_embed);
Diag(Tok, EitherDiag);
}
@@ -1321,7 +1321,8 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
SourceLocation FilenameLoc = Tok.getLocation();
Token FilenameTok = Tok;
- Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
+ Preprocessor::LexEmbedParametersResult Params =
+ this->LexEmbedParameters(Tok, true, false);
if (!Params.Successful) {
if (Tok.isNot(tok::eod))
this->DiscardUntilEndOfDirective();
@@ -1339,7 +1340,6 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
return VALUE__STDC_EMBED_NOT_FOUND__;
}
-
SmallString<128> FilenameBuffer;
SmallString<256> RelativePath;
StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
@@ -1351,11 +1351,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
assert(!Filename.empty());
const FileEntry *LookupFromFile =
this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
- : nullptr;
+ : nullptr;
OptionalFileEntryRef MaybeFileEntry =
this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
- LookupFromFile, nullptr,
- &RelativePath);
+ LookupFromFile, nullptr, &RelativePath);
if (Callbacks) {
Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
}
@@ -1363,11 +1362,15 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
return VALUE__STDC_EMBED_NOT_FOUND__;
}
size_t FileSize = MaybeFileEntry->getSize();
- if (FileSize == 0 ||
- (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) {
+ if (Params.MaybeLimitParam) {
+ if (FileSize > Params.MaybeLimitParam->Limit) {
+ FileSize = Params.MaybeLimitParam->Limit;
+ }
+ }
+ if (FileSize == 0) {
return VALUE__STDC_EMBED_EMPTY__;
}
- if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) {
+ if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) {
return VALUE__STDC_EMBED_EMPTY__;
}
return VALUE__STDC_EMBED_FOUND__;
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index ede4c51487ffbe7..10eb6d268b37b1d 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1411,7 +1411,8 @@ bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
return true;
}
-bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
+bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value,
+ bool WithLex) {
assert(Tok.is(tok::numeric_constant));
SmallString<8> IntegerBuffer;
bool NumberInvalid = false;
@@ -1426,7 +1427,8 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
llvm::APInt APVal(64, 0);
if (Literal.GetIntegerValue(APVal))
return false;
- Lex(Tok);
+ if (WithLex)
+ Lex(Tok);
Value = APVal.getLimitedValue();
return true;
}
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 9dbfc1c8c5e9ffe..ef3ae580a43aeb9 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -32,6 +32,7 @@
#include "clang/Sema/Scope.h"
#include "clang/Sema/TypoCorrection.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Base64.h"
#include <optional>
using namespace clang;
@@ -741,6 +742,8 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
};
}
+// clang-format off
+
/// Parse a cast-expression, or, if \pisUnaryExpression is true, parse
/// a unary-expression.
///
@@ -805,6 +808,7 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
/// [MS] '__builtin_FUNCSIG' '(' ')'
/// [GNU] '__builtin_LINE' '(' ')'
/// [CLANG] '__builtin_COLUMN' '(' ')'
+/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')'
/// [GNU] '__builtin_source_location' '(' ')'
/// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')'
/// [GNU] '__null'
@@ -924,6 +928,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
/// '__is_rvalue_expr'
/// \endverbatim
///
+
+// clang-format on
+
ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
bool isAddressOfOperand,
bool &NotCastExpr,
@@ -1345,6 +1352,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
case tok::kw___builtin_FUNCSIG:
case tok::kw___builtin_LINE:
case tok::kw___builtin_source_location:
+ case tok::kw___builtin_pp_embed:
if (NotPrimaryExpression)
*NotPrimaryExpression = true;
// This parses the complete suffix; we can return early.
@@ -2145,6 +2153,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
} else {
Expr *Fn = LHS.get();
SourceLocation RParLoc = Tok.getLocation();
+ Actions.ModifyCallExprArguments(Fn, Loc, ArgExprs, RParLoc);
LHS = Actions.ActOnCallExpr(getCurScope(), Fn, Loc, ArgExprs, RParLoc,
ExecConfig);
if (LHS.isInvalid()) {
@@ -2560,6 +2569,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
return Operand;
}
+// clang-format off
+
/// ParseBuiltinPrimaryExpression
///
/// \verbatim
@@ -2575,6 +2586,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
/// [MS] '__builtin_FUNCSIG' '(' ')'
/// [GNU] '__builtin_LINE' '(' ')'
/// [CLANG] '__builtin_COLUMN' '(' ')'
+/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')'
/// [GNU] '__builtin_source_location' '(' ')'
/// [OCL] '__builtin_astype' '(' assignment-expression ',' type-name ')'
///
@@ -2583,6 +2595,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
/// [GNU] offsetof-member-designator '.' identifier
/// [GNU] offsetof-member-designator '[' expression ']'
/// \endverbatim
+
+// clang-format on
ExprResult Parser::ParseBuiltinPrimaryExpression() {
ExprResult Res;
const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo();
@@ -2841,6 +2855,96 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen());
break;
}
+ case tok::kw___builtin_pp_embed: {
+ SourceRange DataTyExprSourceRange{};
+ TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
+
+ if (ExpectAndConsume(tok::comma)) {
+ SkipUntil(tok::r_paren, StopAtSemi);
+ Res = ExprError();
+ }
+
+ ExprResult FilenameArgExpr(ParseStringLiteralExpression());
+
+ if (ExpectAndConsume(tok::comma)) {
+ SkipUntil(tok::r_paren, StopAtSemi);
+ Res = ExprError();
+ }
+
+ ExprResult Base64ArgExpr(ParseStringLiteralExpression());
+
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_expected) << tok::r_paren;
+ Res = ExprError();
+ }
+
+ const ASTContext &Context = Actions.getASTContext();
+ QualType DataTy = Context.UnsignedCharTy;
+ size_t TargetWidth = Context.getTypeSize(DataTy);
+ if (DataTyExpr.isInvalid()) {
+ Res = ExprError();
+ } else {
+ DataTy = DataTyExpr.get().get().getCanonicalType();
+ TargetWidth = Context.getTypeSize(DataTy);
+ if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+ DataTy.getUnqualifiedType() != Context.CharTy) {
+ // TODO: check if is exactly the same as unsigned char
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "only 'char' and 'unsigned char' are supported";
+ Res = ExprError();
+ }
+ if ((TargetWidth % CHAR_BIT) != 0) {
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "width of element type is not a multiple of host platform's "
+ "CHAR_BIT!";
+ Res = ExprError();
+ }
+ }
+
+ StringLiteral *FilenameLiteral = nullptr;
+ if (FilenameArgExpr.isInvalid()) {
+ Res = ExprError();
+ } else {
+ FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+ }
+
+ std::vector<char> BinaryData{};
+ if (Base64ArgExpr.isInvalid()) {
+ Res = ExprError();
+ } else {
+ StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+ StringRef Base64StrData = Base64Str->getBytes();
+ if (Base64Str->getKind() != StringLiteral::Ordinary) {
+ Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+ << 0
+ << "'__builtin_pp_embed' with valid base64 encoding that is an "
+ "ordinary \"...\" string";
+ }
+ const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+ Diag(Base64Str->getExprLoc(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "expected a valid base64 encoded string";
+ };
+ llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+ llvm::handleAllErrors(std::move(Err), OnDecodeError);
+ if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "size of data does not split evently into the number of bytes "
+ "requested";
+ Res = ExprError();
+ }
+ }
+
+ if (!Res.isInvalid()) {
+ Res = Actions.ActOnPPEmbedExpr(
+ StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(),
+ FilenameLiteral, DataTy, std::move(BinaryData));
+ }
+ break;
+ }
}
if (Res.isInvalid())
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index f556d0e6d4f8b6e..8364519861fe4f3 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -1671,6 +1671,8 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
// arguments.
} while (TryConsumeToken(tok::comma));
+ Actions.ModifyTemplateArguments(Template, TemplateArgs);
+
return false;
}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f249d41bc9bfbb6..44d8ddba080d82e 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13336,6 +13336,54 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
return;
}
+ // Adjust the init expression for PPEmbedExpr as early as possible
+ // here.
+ bool AlreadyAdjustedPPEmbedExpr = false;
+ if (InitListExpr *ILExpr = dyn_cast_if_present<InitListExpr>(Init); ILExpr) {
+ QualType VDeclTy = VDecl->getType();
+ ArrayRef<Expr *> Inits = ILExpr->inits();
+ if (CheckExprListForPPEmbedExpr(Inits, VDeclTy) == PPEmbedExpr::FoundOne) {
+ PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Inits[0]);
+ ILExpr->setInit(0, PPEmbed->getDataStringLiteral());
+ AlreadyAdjustedPPEmbedExpr = true;
+ }
+ }
+
+ if (!AlreadyAdjustedPPEmbedExpr) {
+ // If there is a PPEmbedExpr as a single initializer without braces,
+ // make sure it only produces a single element (and then expand said
+ // element).
+ if (PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Init);
+ PPEmbed) {
+ if (PPEmbed->getDataElementCount(Context) == 1) {
+ // Expand the list in-place immediately, let the natural work take hold
+ Init = ExpandSinglePPEmbedExpr(PPEmbed);
+ } else {
+ // `__builtin_pp_embed( ... )` only produces 2 or more values.
+ Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type)
+ << "'__builtin_pp_embed'";
+ RealDecl->setInvalidDecl();
+ return;
+ }
+ }
+
+ // Legitimately, in all other cases, COMPLETELY nuke the PPEmbedExpr
+ // and turn it into a list of integers where applicable.
+ if (InitListExpr *ILExpr = dyn_cast_if_present<InitListExpr>(Init);
+ ILExpr) {
+ ArrayRef<Expr *> Inits = ILExpr->inits();
+ SmallVector<Expr *, 4> OutputExprList{};
+ if (ExpandPPEmbedExprInExprList(Inits, OutputExprList, false) ==
+ PPEmbedExpr::Expanded) {
+ ILExpr->resizeInits(Context, OutputExprList.size());
+ for (size_t I = 0; I < OutputExprList.size(); ++I) {
+ auto &InitExpr = OutputExprList[I];
+ ILExpr->setInit(I, InitExpr);
+ }
+ }
+ }
+ }
+
// WebAssembly tables can't be used to initialise a variable.
if (Init && !Init->getType().isNull() &&
Init->getType()->isWebAssemblyTableType()) {
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index f9c010b1a002488..37321d2417a7d2e 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17022,7 +17022,8 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
SourceLocation RParenLoc) {
if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression))
return nullptr;
-
+ if (DiagnosePPEmbedExpr(AssertExpr, StaticAssertLoc, PPEEC_StaticAssert))
+ return nullptr;
return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr,
AssertMessageExpr, RParenLoc, false);
}
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 75730ea888afb41..ebeed7f4d2b485e 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1412,6 +1412,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Expr::SizeOfPackExprClass:
case Expr::StringLiteralClass:
case Expr::SourceLocExprClass:
+ case Expr::PPEmbedExprClass:
case Expr::ConceptSpecializationExprClass:
case Expr::RequiresExprClass:
// These expressions can never throw.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cf45fc388083ce6..c10e6501daef6e2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7110,6 +7110,13 @@ static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S,
<< FixItHint::CreateInsertion(DRE->getLocation(), "std::");
}
+void Sema::ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc,
+ SmallVectorImpl<Expr *> &ArgExprs,
+ SourceLocation RParenLoc) {
+ [[maybe_unused]] PPEmbedExpr::Action Action =
+ ExpandPPEmbedExprInExprList(ArgExprs);
+}
+
ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
MultiExprArg ArgExprs, SourceLocation RParenLoc,
Expr *ExecConfig) {
@@ -7947,8 +7954,17 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
}
}
- InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList,
- RBraceLoc);
+ InitListExpr *E = nullptr;
+ if (InitArgList.size() > 1 &&
+ CheckExprListForPPEmbedExpr(InitArgList, std::nullopt) !=
+ PPEmbedExpr::NotFound) {
+ SmallVector<Expr *, 4> OutputExprList;
+ ExpandPPEmbedExprInExprList(InitArgList, OutputExprList);
+ E = new (Context)
+ InitListExpr(Context, LBraceLoc, OutputExprList, RBraceLoc);
+ } else {
+ E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc);
+ }
E->setType(Context.VoidTy); // FIXME: just a place holder for now.
return E;
}
@@ -17570,6 +17586,225 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext);
}
+ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
+ SourceLocation Base64DataLocation,
+ SourceLocation RPLoc, StringLiteral *Filename,
+ QualType ElementTy,
+ std::vector<char> BinaryData) {
+ uint64_t ArraySizeRawVal[] = {BinaryData.size()};
+ llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
+ 1, ArraySizeRawVal));
+ QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
+ ArrayType::Normal, 0);
+ StringLiteral *BinaryDataLiteral = StringLiteral::Create(
+ Context, StringRef(BinaryData.data(), BinaryData.size()),
+ StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation);
+ return new (Context)
+ PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
+ RPLoc, CurContext);
+}
+
+IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed) {
+ assert(PPEmbed->getDataElementCount(Context) == 1 &&
+ "Data should only contain a single element");
+ StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+ QualType ElementTy = PPEmbed->getType();
+ const size_t TargetWidth = Context.getTypeSize(ElementTy);
+ const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+ StringRef Data = DataLiteral->getBytes();
+ SmallVector<uint64_t, 4> ByteVals{};
+ for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+ if ((ValIndex % sizeof(uint64_t)) == 0) {
+ ByteVals.push_back(0);
+ }
+ const unsigned char DataByte = Data[ValIndex];
+ ByteVals.back() |=
+ (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+ }
+ ArrayRef<uint64_t> ByteValsRef(ByteVals);
+ return IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+ ElementTy, DataLiteral->getBeginLoc());
+}
+
+PPEmbedExpr::Action
+Sema::CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
+ std::optional<QualType> MaybeInitType) {
+ if (ExprList.empty()) {
+ return PPEmbedExpr::NotFound;
+ }
+ PPEmbedExpr *First = ExprList.size() == 1
+ ? dyn_cast_if_present<PPEmbedExpr>(ExprList[0])
+ : nullptr;
+ if (First) {
+ // only one and it's an embed
+ if (MaybeInitType) {
+ // With the type information, we have a duty to check if it matches;
+ // if not, explode it out into a list of integer literals.
+ QualType &InitType = *MaybeInitType;
+ if (InitType->isArrayType()) {
+ const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe();
+ QualType InitElementTy = InitArrayType->getElementType();
+ QualType PPEmbedExprElementTy = First->getType();
+ const bool TypesMatch =
+ Context.typesAreCompatible(InitElementTy, PPEmbedExprElementTy) ||
+ (InitElementTy->isCharType() && PPEmbedExprElementTy->isCharType());
+ if (TypesMatch) {
+ // Keep the PPEmbedExpr, report that everything has been found.
+ return PPEmbedExpr::FoundOne;
+ }
+ }
+ } else {
+ // leave it, possibly adjusted later!
+ return PPEmbedExpr::FoundOne;
+ }
+ }
+ if (std::find_if(ExprList.begin(), ExprList.end(),
+ [](const Expr *const SomeExpr) {
+ return isa<PPEmbedExpr>(SomeExpr);
+ }) == ExprList.end()) {
+ // We didn't find one.
+ return PPEmbedExpr::NotFound;
+ }
+ // Otherwise, we found one but it is not the sole entry in the initialization
+ // list.
+ return PPEmbedExpr::Expanded;
+}
+
+PPEmbedExpr::Action
+Sema::ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &ExprList) {
+ PPEmbedExpr::Action Action = PPEmbedExpr::NotFound;
+ SmallVector<uint64_t, 4> ByteVals{};
+ for (size_t I = 0; I < ExprList.size();) {
+ Expr *&OriginalExpr = ExprList[I];
+ PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(OriginalExpr);
+ if (!PPEmbed) {
+ ++I;
+ continue;
+ }
+ auto ExprListIt = ExprList.erase(&OriginalExpr);
+ const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context);
+ if (ExpectedDataElements == 0) {
+ // No ++I, we are already pointing to newest element.
+ continue;
+ }
+ Action = PPEmbedExpr::Expanded;
+ StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+ QualType ElementTy = PPEmbed->getType();
+ const size_t TargetWidth = Context.getTypeSize(ElementTy);
+ const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+ StringRef Data = DataLiteral->getBytes();
+ size_t Insertions = 0;
+ for (size_t ByteIndex = 0; ByteIndex < Data.size();
+ ByteIndex += BytesPerElement) {
+ ByteVals.clear();
+ for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+ if ((ValIndex % sizeof(uint64_t)) == 0) {
+ ByteVals.push_back(0);
+ }
+ const unsigned char DataByte = Data[ByteIndex + ValIndex];
+ ByteVals.back() |=
+ (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+ }
+ ArrayRef<uint64_t> ByteValsRef(ByteVals);
+ IntegerLiteral *IntLit =
+ IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+ ElementTy, DataLiteral->getBeginLoc());
+ ExprListIt = ExprList.insert(ExprListIt, IntLit);
+ ++Insertions;
+ // make sure we are inserting **after** the item we just inserted, not
+ // before
+ ++ExprListIt;
+ }
+ assert(Insertions == ExpectedDataElements);
+ I += Insertions;
+ }
+ return PPEmbedExpr::Expanded;
+}
+
+PPEmbedExpr::Action
+Sema::ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
+ SmallVectorImpl<Expr *> &OutputExprList,
+ bool ClearOutputFirst) {
+ if (ClearOutputFirst) {
+ OutputExprList.clear();
+ }
+ size_t ExpectedResize = OutputExprList.size() + ExprList.size();
+ const auto FindPPEmbedExpr = [](const Expr *const SomeExpr) {
+ return isa<PPEmbedExpr>(SomeExpr);
+ };
+ if (std::find_if(ExprList.begin(), ExprList.end(), FindPPEmbedExpr) ==
+ ExprList.end()) {
+ return PPEmbedExpr::NotFound;
+ }
+ SmallVector<uint64_t, 4> ByteVals{};
+ OutputExprList.reserve(ExpectedResize);
+ for (size_t I = 0; I < ExprList.size(); ++I) {
+ Expr *OriginalExpr = ExprList[I];
+ PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(OriginalExpr);
+ if (!PPEmbed) {
+ OutputExprList.push_back(OriginalExpr);
+ continue;
+ }
+ StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+ QualType ElementTy = PPEmbed->getType();
+ const size_t TargetWidth = Context.getTypeSize(ElementTy);
+ const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+ StringRef Data = DataLiteral->getBytes();
+ for (size_t ByteIndex = 0; ByteIndex < Data.size();
+ ByteIndex += BytesPerElement) {
+ ByteVals.clear();
+ for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+ if ((ValIndex % sizeof(uint64_t)) == 0) {
+ ByteVals.push_back(0);
+ }
+ const unsigned char DataByte = Data[ByteIndex + ValIndex];
+ ByteVals.back() |=
+ (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+ }
+ ArrayRef<uint64_t> ByteValsRef(ByteVals);
+ IntegerLiteral *IntLit =
+ IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+ ElementTy, DataLiteral->getBeginLoc());
+ OutputExprList.push_back(IntLit);
+ }
+ }
+ return PPEmbedExpr::Expanded;
+}
+
+StringRef Sema::GetLocationName(PPEmbedExprContext Context) const {
+ switch (Context) {
+ default:
+ llvm_unreachable("unhandled PPEmbedExprContext value");
+ case PPEEC__StaticAssert:
+ return "_Static_assert";
+ case PPEEC_StaticAssert:
+ return "static_assert";
+ }
+}
+
+bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
+ PPEmbedExprContext PPEmbedContext,
+ bool SingleAllowed) {
+ PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
+ if (!PPEmbed)
+ return true;
+
+ if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
+ E = ExpandSinglePPEmbedExpr(PPEmbed);
+ return true;
+ }
+
+ StringRef LocationName = GetLocationName(PPEmbedContext);
+ StringRef DiagnosticMessage =
+ (SingleAllowed ? "cannot use a preprocessor embed that expands to "
+ "nothing or expands to "
+ "more than one item in "
+ : "cannot use a preprocessor embed in ");
+ Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
+ << DiagnosticMessage << 1 << LocationName;
+ return false;
+}
+
bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,
bool Diagnose) {
if (!getLangOpts().ObjC)
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index ff370dd1e080b2b..234e678c71b1401 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1623,6 +1623,62 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
return Param;
}
+void Sema::ModifyTemplateArguments(
+ const TemplateTy &Template,
+ SmallVectorImpl<ParsedTemplateArgument> &TemplateArgs) {
+ SmallVector<uint64_t, 4> ByteVals{};
+ for (size_t I = 0; I < TemplateArgs.size();) {
+ ParsedTemplateArgument &OriginalArg = TemplateArgs[I];
+ if (OriginalArg.getKind() != ParsedTemplateArgument::NonType) {
+ ++I;
+ continue;
+ }
+ PPEmbedExpr *PPEmbed = dyn_cast<PPEmbedExpr>(OriginalArg.getAsExpr());
+ if (!PPEmbed) {
+ ++I;
+ continue;
+ }
+ auto TemplateArgListIt = TemplateArgs.erase(&OriginalArg);
+ const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context);
+ if (ExpectedDataElements == 0) {
+ // No ++I; already pointing at the right element!
+ continue;
+ }
+ StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+ QualType ElementTy = PPEmbed->getType();
+ const size_t TargetWidth = Context.getTypeSize(ElementTy);
+ const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+ StringRef Data = DataLiteral->getBytes();
+ size_t Insertions = 0;
+ for (size_t ByteIndex = 0; ByteIndex < Data.size();
+ ByteIndex += BytesPerElement) {
+ ByteVals.clear();
+ for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+ if ((ValIndex % sizeof(uint64_t)) == 0) {
+ ByteVals.push_back(0);
+ }
+ const unsigned char DataByte = Data[ByteIndex + ValIndex];
+ ByteVals.back() |=
+ (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+ }
+ ArrayRef<uint64_t> ByteValsRef(ByteVals);
+ IntegerLiteral *IntLit =
+ IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+ ElementTy, DataLiteral->getBeginLoc());
+ TemplateArgListIt = TemplateArgs.insert(
+ TemplateArgListIt,
+ ParsedTemplateArgument(ParsedTemplateArgument::NonType, IntLit,
+ OriginalArg.getLocation()));
+ ++Insertions;
+ // make sure we are inserting **after** the item we just inserted, not
+ // before
+ ++TemplateArgListIt;
+ }
+ assert(Insertions == ExpectedDataElements);
+ I += Insertions;
+ }
+}
+
/// ActOnTemplateTemplateParameter - Called when a C++ template template
/// parameter (e.g. T in template <template \<typename> class T> class array)
/// has been parsed. S is the current scope.
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8fafdd4f5caa1ed..ed5a03393d4adb5 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12127,6 +12127,12 @@ ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
getSema().CurContext);
}
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformPPEmbedExpr(PPEmbedExpr *E) {
+ // TODO: fully implement for tree transformations
+ return E;
+}
+
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 1bdc3fa3bea455a..9acf786cf3cc463 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1297,6 +1297,15 @@ void ASTStmtReader::VisitSourceLocExpr(SourceLocExpr *E) {
static_cast<SourceLocExpr::IdentKind>(Record.readInt());
}
+void ASTStmtReader::VisitPPEmbedExpr(PPEmbedExpr *E) {
+ VisitExpr(E);
+ E->ParentContext = readDeclAs<DeclContext>();
+ E->BuiltinLoc = readSourceLocation();
+ E->RParenLoc = readSourceLocation();
+ E->Filename = cast<StringLiteral>(Record.readSubStmt());
+ E->BinaryData = cast<StringLiteral>(Record.readSubStmt());
+}
+
void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) {
VisitExpr(E);
E->setAmpAmpLoc(readSourceLocation());
@@ -3121,6 +3130,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = new (Context) SourceLocExpr(Empty);
break;
+ case EXPR_BUILTIN_PP_EMBED:
+ S = new (Context) PPEmbedExpr(Empty);
+ break;
+
case EXPR_ADDR_LABEL:
S = new (Context) AddrLabelExpr(Empty);
break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 125ca17c0c1212e..482daabe30f8349 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1169,6 +1169,16 @@ void ASTStmtWriter::VisitSourceLocExpr(SourceLocExpr *E) {
Code = serialization::EXPR_SOURCE_LOC;
}
+void ASTStmtWriter::VisitPPEmbedExpr(PPEmbedExpr *E) {
+ VisitExpr(E);
+ Record.AddDeclRef(cast_or_null<Decl>(E->getParentContext()));
+ Record.AddSourceLocation(E->getBeginLoc());
+ Record.AddSourceLocation(E->getEndLoc());
+ Record.AddStmt(E->getFilenameStringLiteral());
+ Record.AddStmt(E->getDataStringLiteral());
+ Code = serialization::EXPR_BUILTIN_PP_EMBED;
+}
+
void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) {
VisitExpr(E);
Record.AddSourceLocation(E->getAmpAmpLoc());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 451ee91b94533d5..70347fb9ffb2ca7 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2411,6 +2411,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
Bldr.addNodes(Dst);
break;
}
+
+ case Stmt::PPEmbedExprClass:
+ llvm_unreachable("Support for PPEmbedExpr is not implemented.");
+ break;
}
}
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
new file mode 100644
index 000000000000000..1639fb7af7f07b0
--- /dev/null
+++ b/clang/test/Preprocessor/embed_art.c
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+const char data[] = {
+#embed <media/art.txt>
+};
+const char data2[] = {
+#embed <media/art.txt>
+, 0
+};
+const char data3[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const char data4[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+_Static_assert(sizeof(data) == 274, "");
+_Static_assert(' ' == data[0], "");
+_Static_assert('_' == data[11], "");
+_Static_assert('\n' == data[273], "");
+_Static_assert(sizeof(data2) == 275, "");
+_Static_assert(' ' == data2[0], "");
+_Static_assert('_' == data2[11], "");
+_Static_assert('\n' == data2[273], "");
+_Static_assert('\0' == data2[274], "");
+_Static_assert(sizeof(data3) == 275, "");
+_Static_assert(' ' == data3[0], "");
+_Static_assert('_' == data3[11], "");
+_Static_assert('\n' == data3[273], "");
+_Static_assert('\0' == data3[274], "");
+_Static_assert(sizeof(data4) == 275, "");
+_Static_assert(' ' == data4[0], "");
+_Static_assert('_' == data4[11], "");
+_Static_assert('\n' == data4[273], "");
+_Static_assert('\0' == data4[274], "");
+
+const signed char data5[] = {
+#embed <media/art.txt>
+};
+const signed char data6[] = {
+#embed <media/art.txt>
+, 0
+};
+const signed char data7[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const signed char data8[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+_Static_assert(sizeof(data5) == 274, "");
+_Static_assert(' ' == data5[0], "");
+_Static_assert('_' == data5[11], "");
+_Static_assert('\n' == data5[273], "");
+_Static_assert(sizeof(data6) == 275, "");
+_Static_assert(' ' == data6[0], "");
+_Static_assert('_' == data6[11], "");
+_Static_assert('\n' == data6[273], "");
+_Static_assert('\0' == data6[274], "");
+_Static_assert(sizeof(data7) == 275, "");
+_Static_assert(' ' == data7[0], "");
+_Static_assert('_' == data7[11], "");
+_Static_assert('\n' == data7[273], "");
+_Static_assert('\0' == data7[274], "");
+_Static_assert(sizeof(data8) == 275, "");
+_Static_assert(' ' == data8[0], "");
+_Static_assert('_' == data8[11], "");
+_Static_assert('\n' == data8[273], "");
+_Static_assert('\0' == data8[274], "");
+
+const unsigned char data9[] = {
+#embed <media/art.txt>
+};
+const unsigned char data10[] = {
+0,
+#embed <media/art.txt>
+};
+const unsigned char data11[] = {
+#embed <media/art.txt> prefix(0,)
+};
+const unsigned char data12[] = {
+0
+#embed <media/art.txt> prefix(,)
+};
+_Static_assert(sizeof(data9) == 274, "");
+_Static_assert(' ' == data9[0], "");
+_Static_assert('_' == data9[11], "");
+_Static_assert('\n' == data9[273], "");
+_Static_assert(sizeof(data10) == 275, "");
+_Static_assert(' ' == data10[1], "");
+_Static_assert('_' == data10[12], "");
+_Static_assert('\n' == data10[274], "");
+_Static_assert('\0' == data10[0], "");
+_Static_assert(sizeof(data11) == 275, "");
+_Static_assert(' ' == data11[1], "");
+_Static_assert('_' == data11[12], "");
+_Static_assert('\n' == data11[274], "");
+_Static_assert('\0' == data11[0], "");
+_Static_assert(sizeof(data12) == 275, "");
+_Static_assert(' ' == data12[1], "");
+_Static_assert('_' == data12[12], "");
+_Static_assert('\n' == data12[274], "");
+_Static_assert('\0' == data12[0], "");
+
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
new file mode 100644
index 000000000000000..3be4e1c2a6cf870
--- /dev/null
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+const char data =
+#embed "single_byte.txt"
+;
+_Static_assert('a' == data[0]);
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
new file mode 100644
index 000000000000000..5971a75ee000bbf
--- /dev/null
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+#embed <media/empty>
+;
+
+void f (unsigned char x) { (void)x;}
+void g () {}
+void h (unsigned char x, int y) {(void)x; (void)y;}
+int i () {
+ return
+#embed <single_byte.txt>
+ ;
+}
+
+_Static_assert(
+#embed <single_byte.txt> suffix(,)
+""
+);
+_Static_assert(
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <single_byte.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+_Static_assert(sizeof
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <jk.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+
+#ifdef __cplusplus
+template <int First, int Second>
+void j() {
+ static_assert(First == 'j', "");
+ static_assert(Second == 'k', "");
+}
+#endif
+
+void do_stuff() {
+ f(
+#embed <single_byte.txt>
+ );
+ g(
+#embed <media/empty>
+ );
+ h(
+#embed <jk.txt>
+ );
+ int r = i();
+ (void)r;
+#ifdef __cplusplus
+ j<
+#embed <jk.txt>
+ >(
+#embed <media/empty>
+ );
+#endif
+}
+// expected-no-diagnostics
diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h
index 3d96884749b32f4..8fcef706e916733 100644
--- a/llvm/include/llvm/Support/Base64.h
+++ b/llvm/include/llvm/Support/Base64.h
@@ -20,37 +20,43 @@
namespace llvm {
-template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+template <class InputBytes, class OutputContainer>
+void encodeBase64(InputBytes const &Bytes, OutputContainer &OutputBuffer) {
static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
- std::string Buffer;
- Buffer.resize(((Bytes.size() + 2) / 3) * 4);
+ const std::size_t IndexOffset = OutputBuffer.size();
+ OutputBuffer.resize(OutputBuffer.size() + (((Bytes.size() + 2) / 3) * 4));
size_t i = 0, j = 0;
for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) {
uint32_t x = ((unsigned char)Bytes[i] << 16) |
((unsigned char)Bytes[i + 1] << 8) |
(unsigned char)Bytes[i + 2];
- Buffer[j + 0] = Table[(x >> 18) & 63];
- Buffer[j + 1] = Table[(x >> 12) & 63];
- Buffer[j + 2] = Table[(x >> 6) & 63];
- Buffer[j + 3] = Table[x & 63];
+ OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+ OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+ OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+ OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
}
if (i + 1 == Bytes.size()) {
uint32_t x = ((unsigned char)Bytes[i] << 16);
- Buffer[j + 0] = Table[(x >> 18) & 63];
- Buffer[j + 1] = Table[(x >> 12) & 63];
- Buffer[j + 2] = '=';
- Buffer[j + 3] = '=';
+ OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+ OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+ OutputBuffer[IndexOffset + j + 2] = '=';
+ OutputBuffer[IndexOffset + j + 3] = '=';
} else if (i + 2 == Bytes.size()) {
uint32_t x =
((unsigned char)Bytes[i] << 16) | ((unsigned char)Bytes[i + 1] << 8);
- Buffer[j + 0] = Table[(x >> 18) & 63];
- Buffer[j + 1] = Table[(x >> 12) & 63];
- Buffer[j + 2] = Table[(x >> 6) & 63];
- Buffer[j + 3] = '=';
+ OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+ OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+ OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+ OutputBuffer[IndexOffset + j + 3] = '=';
}
+}
+
+template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+ std::string Buffer;
+ encodeBase64(Bytes, Buffer);
return Buffer;
}
>From 77aad07644b135196511dfe1d60bc08617e9d72b Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 10:31:54 -0500
Subject: [PATCH 03/29] Update based on API changes in community
---
clang/lib/Parse/ParseExpr.cpp | 2 +-
clang/lib/Sema/SemaExpr.cpp | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 25da32ee0b88c1d..4c96b62b3e323bd 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2918,7 +2918,7 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
} else {
StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
StringRef Base64StrData = Base64Str->getBytes();
- if (Base64Str->getKind() != StringLiteral::Ordinary) {
+ if (Base64Str->getKind() != StringLiteralKind::Ordinary) {
Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
<< 0
<< "'__builtin_pp_embed' with valid base64 encoding that is an "
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index be1f22bc93dde7f..87626d6af05d7d0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17609,10 +17609,10 @@ ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
1, ArraySizeRawVal));
QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
- ArrayType::Normal, 0);
+ ArraySizeModifier::Normal, 0);
StringLiteral *BinaryDataLiteral = StringLiteral::Create(
Context, StringRef(BinaryData.data(), BinaryData.size()),
- StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation);
+ StringLiteralKind::Ordinary, false, ArrayTy, Base64DataLocation);
return new (Context)
PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
RPLoc, CurContext);
>From 1cca72573478b5572d10721e9c94f2aea2d7e394 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:14:26 -0500
Subject: [PATCH 04/29] We don't yet expose a libclang cursor for embed
expressions
---
clang/tools/libclang/CXCursor.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index fd03c48ba1a42aa..08f5830afaa9625 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -335,6 +335,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
case Stmt::ObjCSubscriptRefExprClass:
case Stmt::RecoveryExprClass:
case Stmt::SYCLUniqueStableNameExprClass:
+ case Stmt::PPEmbedExprClass:
K = CXCursor_UnexposedExpr;
break;
>From cd6142dc5899dd55ca693665ea313521db750d74 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:40:24 -0500
Subject: [PATCH 05/29] Update preprocessor tests for new builtin macros
---
clang/test/Preprocessor/init-aarch64.c | 3 +++
clang/test/Preprocessor/init.c | 3 +++
2 files changed, 6 insertions(+)
diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index 2b7cc57f2303333..b666fa99f39b42f 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -262,6 +262,9 @@
// AARCH64-NEXT: #define __SIZE_WIDTH__ 64
// AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL
// AARCH64_CXX: #define __STDCPP_THREADS__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2
+// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0
// AARCH64-NEXT: #define __STDC_HOSTED__ 1
// AARCH64-NEXT: #define __STDC_UTF_16__ 1
// AARCH64-NEXT: #define __STDC_UTF_32__ 1
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index a0a2879cb58c7fc..0f728a69c34e561 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -1797,6 +1797,9 @@
// WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
// WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0
// WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
// WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
// WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__
>From 495f1d49d3e88c294be43e752ef699c267f67f8c Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:40:52 -0500
Subject: [PATCH 06/29] Fix logical think-o with the test
---
clang/test/Preprocessor/embed_single_entity.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
index 3be4e1c2a6cf870..8cbee2a93626152 100644
--- a/clang/test/Preprocessor/embed_single_entity.c
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 -embed-dir=%S/Inputs -verify
const char data =
#embed "single_byte.txt"
;
-_Static_assert('a' == data[0]);
+_Static_assert('b' == data);
// expected-no-diagnostics
>From 680c3798811c3df1c8e92181a79655b24349ebce Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 13:05:09 -0500
Subject: [PATCH 07/29] Fix -Wreorder diagnostics; NFC
---
clang/include/clang/Lex/PPEmbedParameters.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index 7b76d2d573c23bd..dfc835ecfc835af 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -27,7 +27,7 @@ class PPEmbedParameterOffset : public PPDirectiveParameter {
PPEmbedParameterOffset(size_t Offset, SourceLocation Start,
SourceLocation End)
- : Offset(Offset), PPDirectiveParameter(Start, End) {}
+ : PPDirectiveParameter(Start, End), Offset(Offset) {}
};
/// Preprocessor standard embed parameter "limit"
@@ -37,7 +37,7 @@ class PPEmbedParameterLimit : public PPDirectiveParameter {
size_t Limit;
PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End)
- : Limit(Limit), PPDirectiveParameter(Start, End) {}
+ : PPDirectiveParameter(Start, End), Limit(Limit) {}
};
/// Preprocessor standard embed parameter "prefix"
@@ -48,7 +48,7 @@ class PPEmbedParameterPrefix : public PPDirectiveParameter {
PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
SourceLocation End)
- : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+ : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
};
/// Preprocessor standard embed parameter "suffix"
@@ -59,7 +59,7 @@ class PPEmbedParameterSuffix : public PPDirectiveParameter {
PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
SourceLocation End)
- : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+ : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
};
/// Preprocessor standard embed parameter "if_empty"
@@ -70,7 +70,7 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
SourceLocation End)
- : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+ : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
};
} // end namespace clang
>From a0f8278db25809e3fc397edaac909ef809931567 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 13:15:37 -0500
Subject: [PATCH 08/29] Clean up these constructors to take a SmallVectorImpl
This way we're not tied to a SmallVector<Token, 2> specifically in callers.
---
clang/include/clang/Lex/PPEmbedParameters.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index dfc835ecfc835af..f6de84bdc915148 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -46,7 +46,7 @@ class PPEmbedParameterPrefix : public PPDirectiveParameter {
public:
SmallVector<Token, 2> Tokens;
- PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+ PPEmbedParameterPrefix(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
SourceLocation End)
: PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
};
@@ -57,7 +57,7 @@ class PPEmbedParameterSuffix : public PPDirectiveParameter {
public:
SmallVector<Token, 2> Tokens;
- PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+ PPEmbedParameterSuffix(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
SourceLocation End)
: PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
};
@@ -68,7 +68,7 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
public:
SmallVector<Token, 2> Tokens;
- PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
+ PPEmbedParameterIfEmpty(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
SourceLocation End)
: PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
};
>From 4d9ed9e2f4bd27013681461edda6768ebbb7aaa1 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:13:32 -0500
Subject: [PATCH 09/29] Fix a crash with argument parsing
If the user passes -fno-builtin, then the call to getValue() will
assert due to an out of bounds access. So we check to see which form
the user passes (-fno-builtin or -fno-builtin-pp_embed).
Additionally, we need to round trip the argument properly depending on
which form the user passed.
---
clang/lib/Frontend/CompilerInvocation.cpp | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index ce1341421bab694..6660a116dad8022 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4330,8 +4330,14 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
for (const auto &EmbedEntry : Opts.EmbedEntries)
GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
- if (Opts.NoBuiltinPPEmbed)
- GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+ if (Opts.NoBuiltinPPEmbed) {
+ // We need to figure out whether the user passed -fno-builtins or
+ // specifically disabled pp_embed. If NoBuiltin is true, we don't need to
+ // generate an arg because that disables everything. Otherwise, we assume
+ // the user passed -fno-builtin-pp_embed and generate that.
+ if (!LangOpts.NoBuiltin)
+ GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+ }
// Don't handle LexEditorPlaceholders. It is implied by the action that is
// generated elsewhere.
@@ -4432,10 +4438,12 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
// Can disable the internal embed builtin / token
for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
- StringRef Val = A->getValue();
- if (Val == "pp_embed") {
- Opts.NoBuiltinPPEmbed = true;
- }
+ bool NoBuiltinEmbed = false;
+ if (A->getNumValues())
+ NoBuiltinEmbed = A->getValue() == StringRef("pp_embed");
+ else
+ NoBuiltinEmbed = true; // All builtins are disabled.
+ Opts.NoBuiltinPPEmbed = NoBuiltinEmbed;
}
// Always avoid lexing editor placeholders when we're just running the
>From 8a466f3354cbf862a3bc1edd71c32289f337ebb0 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:55:36 -0500
Subject: [PATCH 10/29] Back out unrelated CMake changes
---
clang/CMakeLists.txt | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 1b88905da3b8597..9b52c58be41e7f7 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -300,7 +300,6 @@ configure_file(
${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc)
# Add appropriate flags for GCC
-option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -308,7 +307,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
endif ()
# Enable -pedantic for Clang even if it's not enabled for LLVM.
- if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC)
+ if (NOT LLVM_ENABLE_PEDANTIC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long")
endif ()
>From a3d4b13f9dbd9d11bbd8f619de3ac888a880bf82 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:57:50 -0500
Subject: [PATCH 11/29] Remove a spurious #undef; NFC
---
clang/include/clang/Basic/TokenKinds.def | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 6b726463f0cdd31..613f6d64eb8bdc9 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -991,7 +991,6 @@ ANNOTATION(repl_input_end)
#undef CXX11_KEYWORD
#undef KEYWORD
#undef PUNCTUATOR
-#undef BUILTINOK
#undef TOK
#undef C99_KEYWORD
#undef C23_KEYWORD
>From 7dad1be74cc40cbb1694d58e8f7553c8741634ec Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 15:12:17 -0500
Subject: [PATCH 12/29] Backing out more unnecessary CMake changes
---
llvm/CMakeLists.txt | 7 -------
llvm/cmake/modules/GetHostTriple.cmake | 6 +++---
2 files changed, 3 insertions(+), 10 deletions(-)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index cb049ccb7d9c8cd..7ff3acd48304de7 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -780,13 +780,6 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
"Semicolon-separated list of components to include in libLLVM, or \"all\".")
endif()
-option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON)
-# Quiet down MSVC-style secure CRT warnings
-if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS)
- add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1)
-endif()
-
-
if(MSVC)
option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON)
# Set this variable to OFF here so it can't be set with a command-line
diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake
index 828227f2f25a2f0..1be13bc01ab9b25 100644
--- a/llvm/cmake/modules/GetHostTriple.cmake
+++ b/llvm/cmake/modules/GetHostTriple.cmake
@@ -2,7 +2,7 @@
# Invokes config.guess
function( get_host_triple var )
- if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") )
+ if( MSVC )
if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" )
set( value "aarch64-pc-windows-msvc" )
elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" )
@@ -41,7 +41,7 @@ function( get_host_triple var )
else()
set( value "powerpc-ibm-aix" )
endif()
- else()
+ else( MSVC )
if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS)
message(WARNING "unable to determine host target triple")
else()
@@ -55,6 +55,6 @@ function( get_host_triple var )
endif( NOT TT_RV EQUAL 0 )
set( value ${TT_OUT} )
endif()
- endif()
+ endif( MSVC )
set( ${var} ${value} PARENT_SCOPE )
endfunction( get_host_triple var )
>From 29ac376978331a6453575004814cb8e9364bd933 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 07:16:21 -0500
Subject: [PATCH 13/29] Correct the logic for this diagnostic checking function
This fixes a few hundred failing test cases for me; still several left
failing though.
---
clang/lib/Sema/SemaExpr.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 87626d6af05d7d0..c932abf8d931906 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17801,11 +17801,11 @@ bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
bool SingleAllowed) {
PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
if (!PPEmbed)
- return true;
+ return false;
if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
E = ExpandSinglePPEmbedExpr(PPEmbed);
- return true;
+ return false;
}
StringRef LocationName = GetLocationName(PPEmbedContext);
@@ -17816,7 +17816,7 @@ bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
: "cannot use a preprocessor embed in ");
Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
<< DiagnosticMessage << 1 << LocationName;
- return false;
+ return true;
}
bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,
>From e4e28eb990098d8a203013d946dd5a4243a8fb0f Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 07:38:23 -0500
Subject: [PATCH 14/29] Fix think-o with test to get it to pass
---
clang/test/Preprocessor/embed_path_quote.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
index 791cd9176ebe0ab..7e39d9be3b0a523 100644
--- a/clang/test/Preprocessor/embed_path_quote.c
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -4,5 +4,5 @@ const char data[] = {
#embed "single_byte.txt"
};
_Static_assert(sizeof(data) == 1, "");
-_Static_assert('a' == data[0], "");
+_Static_assert('b' == data[0], "");
// expected-no-diagnostics
>From ab5f8c204d03bab9bd516c299a478b0d72467b01 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 08:50:21 -0500
Subject: [PATCH 15/29] Restore previous behavior; fixes two more failing test
cases
---
clang/lib/Lex/PPExpressions.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index dda5717afc699da..e0bd73e8680921c 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -935,10 +935,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- const bool IsNonZero = ResVal.Val != 0;
const SourceRange ValRange = ResVal.getRange();
- return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
- ValRange};
+ return {std::move(ResVal.Val), false, DT.IncludedUndefinedIds, ValRange};
}
if (CheckForEoD) {
>From 9d5eadfc04ed7276bab79321294b6bff4f35bb85 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 09:37:59 -0500
Subject: [PATCH 16/29] Clean up the way we expose the __STDC_EMBED_*__ macros;
NFC
---
clang/include/clang/Lex/Preprocessor.h | 11 +++++----
clang/lib/Frontend/InitPreprocessor.cpp | 9 +++++---
clang/lib/Lex/PPMacroExpansion.cpp | 30 ++++++++++++-------------
3 files changed, 28 insertions(+), 22 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index ea461aba0611f0f..8db920ad2dc6610 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -122,6 +122,12 @@ enum MacroUse {
MU_Undef = 2
};
+enum class EmbedResult {
+ NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
+ Found = 1, // Corresponds to __STDC_EMBED_FOUND__
+ Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
+};
+
/// Engages in a tight little dance with the lexer to efficiently
/// preprocess tokens.
///
@@ -211,9 +217,6 @@ class Preprocessor {
enum {
/// Maximum depth of \#includes.
MaxAllowedIncludeStackDepth = 200,
- VALUE__STDC_EMBED_NOT_FOUND__ = 0,
- VALUE__STDC_EMBED_FOUND__ = 1,
- VALUE__STDC_EMBED_EMPTY__ = 2,
};
// State that is set before the preprocessor begins.
@@ -2584,7 +2587,7 @@ class Preprocessor {
///
/// Returns predefined `__STDC_EMBED_*` macro values if
/// successful.
- int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
+ EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
/// Process a '__has_include("path")' expression.
///
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b7d084773b0a195..cc9c6733f442968 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -499,9 +499,12 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__STDC_UTF_32__", "1");
// __has_embed definitions
- Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0");
- Builder.defineMacro("__STDC_EMBED_FOUND__", "1");
- Builder.defineMacro("__STDC_EMBED_EMPTY__", "2");
+ Builder.defineMacro("__STDC_EMBED_NOT_FOUND__",
+ llvm::itostr(static_cast<int>(EmbedResult::NotFound)));
+ Builder.defineMacro("__STDC_EMBED_FOUND__",
+ llvm::itostr(static_cast<int>(EmbedResult::Found)));
+ Builder.defineMacro("__STDC_EMBED_EMPTY__",
+ llvm::itostr(static_cast<int>(EmbedResult::Empty)));
if (LangOpts.ObjC)
Builder.defineMacro("__OBJC__");
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index b25faf8c873d389..a55bc719328ad62 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1272,7 +1272,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
/// Returns a filled optional with the value if successful; otherwise, empty.
-int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
// pedwarn for not being on C23
if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
@@ -1290,13 +1290,13 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
// Return a valid identifier token.
assert(Tok.is(tok::identifier));
Tok.setIdentifierInfo(II);
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
// Get '('. If we don't have a '(', try to form a header-name token.
do {
if (this->LexHeaderName(Tok)) {
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
} while (Tok.getKind() == tok::comment);
@@ -1308,19 +1308,19 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
// If the next token looks like a filename or the start of one,
// assume it is and process it as such.
if (Tok.isNot(tok::header_name)) {
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
} else {
// Save '(' location for possible missing ')' message.
LParenLoc = Tok.getLocation();
if (this->LexHeaderName(Tok)) {
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
}
if (Tok.isNot(tok::header_name)) {
Diag(Tok.getLocation(), diag::err_pp_expects_filename);
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
SourceLocation FilenameLoc = Tok.getLocation();
@@ -1331,10 +1331,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
if (!Params.Successful) {
if (Tok.isNot(tok::eod))
this->DiscardUntilEndOfDirective();
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
if (Params.UnrecognizedParams > 0) {
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
if (!Tok.is(tok::r_paren)) {
@@ -1342,7 +1342,7 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
<< II << tok::r_paren;
Diag(LParenLoc, diag::note_matching) << tok::l_paren;
DiscardUntilEndOfDirective();
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
SmallString<128> FilenameBuffer;
@@ -1364,7 +1364,7 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
}
if (!MaybeFileEntry) {
- return VALUE__STDC_EMBED_NOT_FOUND__;
+ return EmbedResult::NotFound;
}
size_t FileSize = MaybeFileEntry->getSize();
if (Params.MaybeLimitParam) {
@@ -1373,12 +1373,12 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
}
}
if (FileSize == 0) {
- return VALUE__STDC_EMBED_EMPTY__;
+ return EmbedResult::Empty;
}
if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) {
- return VALUE__STDC_EMBED_EMPTY__;
+ return EmbedResult::Empty;
}
- return VALUE__STDC_EMBED_FOUND__;
+ return EmbedResult::Found;
}
bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
@@ -1923,11 +1923,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
// file name string literal using angle brackets (<>) or
// double-quotes (""), optionally followed by a series of
// arguments similar to form like attributes.
- int Value = EvaluateHasEmbed(Tok, II);
+ EmbedResult Value = EvaluateHasEmbed(Tok, II);
if (Tok.isNot(tok::r_paren))
return;
- OS << Value;
+ OS << static_cast<int>(Value);
Tok.setKind(tok::numeric_constant);
} else if (II == Ident__has_warning) {
// The argument should be a parenthesized string literal.
>From f88a1aec9865fdd3cb44aaa45d4d141a6195854e Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 09:51:08 -0500
Subject: [PATCH 17/29] Fix a broken pp-trace test
The test needs to care about the three new predefined macros.
---
clang-tools-extra/test/pp-trace/pp-trace-macro.cpp | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
index 1d85607e86b7fff..7c2a231101070d7 100644
--- a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
+++ b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
@@ -31,6 +31,15 @@ X
// CHECK: MacroNameTok: __STDC_UTF_32__
// CHECK-NEXT: MacroDirective: MD_Define
// CHECK: - Callback: MacroDefined
+// CHECK-NEXT: MacroNameTok: __STDC_EMBED_NOT_FOUND__
+// CHECK-NEXT: MacroDirective: MD_Define
+// CHECK: - Callback: MacroDefined
+// CHECK-NEXT: MacroNameTok: __STDC_EMBED_FOUND__
+// CHECK-NEXT: MacroDirective: MD_Define
+// CHECK: - Callback: MacroDefined
+// CHECK-NEXT: MacroNameTok: __STDC_EMBED_EMPTY__
+// CHECK-NEXT: MacroDirective: MD_Define
+// CHECK: - Callback: MacroDefined
// CHECK: - Callback: MacroDefined
// CHECK-NEXT: MacroNameTok: MACRO
// CHECK-NEXT: MacroDirective: MD_Define
>From e7ef292e0e61591eaf3bda238265f45a3e468e48 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 11:56:35 -0500
Subject: [PATCH 18/29] Remove __builtin_pp_embed as a builtin function; NFC
This is a weird builtin function that's more like __builtin_offsetof
in that it takes a type argument. Therefore, it's not really a function
call like other builtins (we wouldn't check its validity in
SemaChecking.cpp).
---
clang/include/clang/Basic/Builtins.def | 3 ---
1 file changed, 3 deletions(-)
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index fa3d83d1a34bec0..ec39e926889b936 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1770,9 +1770,6 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
// Arithmetic Fence: to prevent FP reordering and reassociation optimizations
LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES)
-// preprocessor embed builtin
-LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES)
-
#undef BUILTIN
#undef LIBBUILTIN
#undef LANGBUILTIN
>From 7c6bc7b776be54f7dca27ce34222c9ca7b1beda4 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:00:02 -0500
Subject: [PATCH 19/29] Add a test for feature testing the builtin
---
clang/test/Preprocessor/embed_builtin.cpp | 6 ++++++
1 file changed, 6 insertions(+)
create mode 100644 clang/test/Preprocessor/embed_builtin.cpp
diff --git a/clang/test/Preprocessor/embed_builtin.cpp b/clang/test/Preprocessor/embed_builtin.cpp
new file mode 100644
index 000000000000000..d2547fa0c3f668d
--- /dev/null
+++ b/clang/test/Preprocessor/embed_builtin.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+#if !__has_builtin(__builtin_pp_embed)
+#error "Don't have __builtin_pp_embed?"
+#endif
>From 038c90d4e9dc2c17900064b7e059061165b6d993 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:22:00 -0500
Subject: [PATCH 20/29] Correct parsing behavior and add tests
There is likely more work to be done here to split parsing and semantic
concerns. This also pointed out an issue where __builtin_pp_embed seems
to have a non-void return type, but who knows what it actually returns
as a value.
---
.../clang/Basic/DiagnosticCommonKinds.td | 2 +-
clang/lib/Parse/ParseExpr.cpp | 118 +++++++++---------
clang/test/Parser/embed_builtin.cpp | 14 +++
3 files changed, 75 insertions(+), 59 deletions(-)
create mode 100644 clang/test/Parser/embed_builtin.cpp
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index b2e770b540944e3..6368f0ceeac3274 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -57,7 +57,7 @@ def err_expected_string_literal : Error<"expected string literal "
"for optional message in 'availability' attribute|"
"for %select{language name|source container name|USR}1 in "
"'external_source_symbol' attribute|"
- "as argument of '%1' attribute}0">;
+ "as argument of '%1' attribute|as the %ordinal1 argument}0">;
def err_builtin_pp_embed_invalid_argument : Error<
"invalid argument to '__builtin_pp_embed': %0">;
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 4c96b62b3e323bd..03d181586f83b5a 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2858,86 +2858,88 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
break;
}
case tok::kw___builtin_pp_embed: {
- SourceRange DataTyExprSourceRange{};
+ // __builtin_pp_embed( type-name , string-literal , string-literal )
+ SourceRange DataTyExprSourceRange;
TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
+ if (DataTyExpr.isInvalid()) {
+ SkipUntil(tok::r_paren, StopAtSemi);
+ return ExprError();
+ }
+
if (ExpectAndConsume(tok::comma)) {
SkipUntil(tok::r_paren, StopAtSemi);
- Res = ExprError();
+ return ExprError();
}
- ExprResult FilenameArgExpr(ParseStringLiteralExpression());
+ if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+ Diag(Tok, diag::err_expected_string_literal)
+ << /*as argument*/ 5 << /*second argument*/ 2;
+ SkipUntil(tok::r_paren, StopAtSemi);
+ return ExprError();
+ }
+ ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
- if (ExpectAndConsume(tok::comma)) {
+ if (FilenameArgExpr.isInvalid() || ExpectAndConsume(tok::comma)) {
SkipUntil(tok::r_paren, StopAtSemi);
- Res = ExprError();
+ return ExprError();
}
- ExprResult Base64ArgExpr(ParseStringLiteralExpression());
+ if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+ Diag(Tok, diag::err_expected_string_literal)
+ << /*as argument*/ 5 << /*third argument*/ 3;
+ SkipUntil(tok::r_paren, StopAtSemi);
+ return ExprError();
+ }
+ ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
- if (Tok.isNot(tok::r_paren)) {
+ if (Base64ArgExpr.isInvalid() || Tok.isNot(tok::r_paren)) {
Diag(Tok, diag::err_expected) << tok::r_paren;
- Res = ExprError();
+ return ExprError();
}
const ASTContext &Context = Actions.getASTContext();
- QualType DataTy = Context.UnsignedCharTy;
+ QualType DataTy = DataTyExpr.get().get().getCanonicalType();
size_t TargetWidth = Context.getTypeSize(DataTy);
- if (DataTyExpr.isInvalid()) {
+ if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+ DataTy.getUnqualifiedType() != Context.CharTy) {
+ // TODO: check if is exactly the same as unsigned char
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "only 'char' and 'unsigned char' are supported";
Res = ExprError();
- } else {
- DataTy = DataTyExpr.get().get().getCanonicalType();
- TargetWidth = Context.getTypeSize(DataTy);
- if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
- DataTy.getUnqualifiedType() != Context.CharTy) {
- // TODO: check if is exactly the same as unsigned char
- Diag(DataTyExprSourceRange.getBegin(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "only 'char' and 'unsigned char' are supported";
- Res = ExprError();
- }
- if ((TargetWidth % CHAR_BIT) != 0) {
- Diag(DataTyExprSourceRange.getBegin(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "width of element type is not a multiple of host platform's "
- "CHAR_BIT!";
- Res = ExprError();
- }
}
-
- StringLiteral *FilenameLiteral = nullptr;
- if (FilenameArgExpr.isInvalid()) {
+ if ((TargetWidth % CHAR_BIT) != 0) {
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "width of element type is not a multiple of host platform's "
+ "CHAR_BIT!";
Res = ExprError();
- } else {
- FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
}
- std::vector<char> BinaryData{};
- if (Base64ArgExpr.isInvalid()) {
+ StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+ std::vector<char> BinaryData;
+ StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+ StringRef Base64StrData = Base64Str->getBytes();
+ if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
+ Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+ << 0
+ << "'__builtin_pp_embed' with valid base64 encoding that is an "
+ "ordinary \"...\" string";
+ }
+ const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+ Diag(Base64Str->getExprLoc(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "expected a valid base64 encoded string";
+ };
+ llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+ llvm::handleAllErrors(std::move(Err), OnDecodeError);
+ if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "size of data does not split evently into the number of bytes "
+ "requested";
Res = ExprError();
- } else {
- StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
- StringRef Base64StrData = Base64Str->getBytes();
- if (Base64Str->getKind() != StringLiteralKind::Ordinary) {
- Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
- << 0
- << "'__builtin_pp_embed' with valid base64 encoding that is an "
- "ordinary \"...\" string";
- }
- const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
- Diag(Base64Str->getExprLoc(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "expected a valid base64 encoded string";
- };
- llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
- llvm::handleAllErrors(std::move(Err), OnDecodeError);
- if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
- Diag(DataTyExprSourceRange.getBegin(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "size of data does not split evently into the number of bytes "
- "requested";
- Res = ExprError();
- }
}
if (!Res.isInvalid()) {
diff --git a/clang/test/Parser/embed_builtin.cpp b/clang/test/Parser/embed_builtin.cpp
new file mode 100644
index 000000000000000..487c11c393ad0ee
--- /dev/null
+++ b/clang/test/Parser/embed_builtin.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+void parsing_diags() {
+ __builtin_pp_embed; // expected-error {{expected '(' after '__builtin_pp_embed'}}
+ __builtin_pp_embed(; // expected-error {{expected a type}}
+ __builtin_pp_embed(); // expected-error {{expected a type}}
+ __builtin_pp_embed(12); // expected-error {{expected a type}}
+ __builtin_pp_embed(int); // expected-error {{expected ','}}
+ __builtin_pp_embed(int, 12); // expected-error {{expected string literal as the 2nd argument}}
+ __builtin_pp_embed(int, "", 12); // expected-error {{expected string literal as the 3rd argument}}
+ __builtin_pp_embed(int, "", "", 12); // expected-error {{expected ')'}}
+ (void)__builtin_pp_embed(char, L"", ""); // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
+ (void)__builtin_pp_embed(char, "", L""); // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
+}
>From c204b7358f2fcd495d495831ea71baa67f693711 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:40:23 -0500
Subject: [PATCH 21/29] No longer expose the embed driver options to Flang
The options don't make sense outside of Clang currently.
---
clang/include/clang/Driver/Options.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index aef200cc5729279..91c6ff70cad7236 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -832,11 +832,11 @@ def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
Flags<[RenderJoined]>, Group<EmbedPath_Group>,
- Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+ Visibility<[ClangOption, CC1Option]>,
MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
Flags<[RenderJoined]>, Group<EmbedPath_Group>,
- Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+ Visibility<[ClangOption, CC1Option]>,
MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
def MD : Flag<["-"], "MD">, Group<M_Group>,
HelpText<"Write a depfile containing user and system headers">;
>From ec01bec24f4c71f3bd50ae717490db628cd1dde8 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 07:32:29 -0500
Subject: [PATCH 22/29] Fix type mismatch that was upsetting the precommit CI
bot
---
clang/lib/Lex/PPDirectives.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 2902d5da7bc5cf5..9d5d6dcdb7a8c2a 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1386,7 +1386,7 @@ void Preprocessor::HandleDirective(Token &Result) {
return HandleEmbedDirective(SavedHash.getLocation(), Result,
getCurrentFileLexer()
? getCurrentFileLexer()->getFileEntry()
- : nullptr);
+ : static_cast<FileEntry *>(nullptr));
case tok::pp_assert:
//isExtension = true; // FIXME: implement #assert
break;
>From f57334a078a20da3da4e327dbceb3dc83ad3a2fc Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:11:00 -0500
Subject: [PATCH 23/29] Fix misuse of Twine and add a test
The issue would previously manifest in -E output where we would print:
1>
instead of:
<built-in:embed:1>
---
clang/lib/Lex/PPDirectives.cpp | 18 ++++++------------
.../Preprocessor/embed_preprocess_to_file.c | 13 +++++++++++++
2 files changed, 19 insertions(+), 12 deletions(-)
create mode 100644 clang/test/Preprocessor/embed_preprocess_to_file.c
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9d5d6dcdb7a8c2a..695fca9f5157aaa 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3899,11 +3899,6 @@ void Preprocessor::HandleEmbedDirectiveNaive(
// particular.
EmbedBuffers.push_back("");
size_t EmbedBufferNumber = EmbedBuffers.size();
- std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
- llvm::Twine EmbedBufferName = [](const std::string &Number) {
- llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
- return PrefixNumber.concat(">");
- }(EmbedBufferNumberVal);
std::string &TargetEmbedBuffer = EmbedBuffers.back();
const size_t TotalSize = BinaryContents.size();
// In the future, this might change/improve.
@@ -3956,7 +3951,9 @@ void Preprocessor::HandleEmbedDirectiveNaive(
// Create faux-file and its ID, backed by a memory buffer.
std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
- llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+ llvm::MemoryBuffer::getMemBufferCopy(
+ TargetEmbedBuffer,
+ "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
assert(EmbedMemBuffer && "Cannot create predefined source buffer");
FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
assert(EmbedBufferFID.isValid() &&
@@ -4113,11 +4110,6 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
// particular.
EmbedBuffers.push_back("");
size_t EmbedBufferNumber = EmbedBuffers.size();
- std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
- llvm::Twine EmbedBufferName = [](const std::string &Number) {
- llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
- return PrefixNumber.concat(">");
- }(EmbedBufferNumberVal);
std::string &TargetEmbedBuffer = EmbedBuffers.back();
StringRef TypeName = "unsigned char";
const size_t TotalSize =
@@ -4147,7 +4139,9 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
TargetEmbedBuffer.append("\")");
// Create faux-file and its ID, backed by a memory buffer.
std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
- llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+ llvm::MemoryBuffer::getMemBufferCopy(
+ TargetEmbedBuffer,
+ "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
assert(EmbedMemBuffer && "Cannot create predefined source buffer");
FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
assert(EmbedBufferFID.isValid() &&
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
new file mode 100644
index 000000000000000..96447d4d6b11f7e
--- /dev/null
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -std=c23 -E -embed-dir=%S/Inputs | FileCheck %s
+
+// Ensure that we print out the correct data to the preprocessed file. Note,
+// #embed will do a base64 encoding of the file contents, so if art.txt changes,
+// this test will need to change accordingly as well.
+const char data[] = {
+#embed <media/art.txt>
+};
+
+// CHECK: # 1 "<built-in:embed:1>" 1
+// CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
+// CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
+};
>From 8ef8da333a4c1ddfea85e745546b06849d266228 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:14:00 -0500
Subject: [PATCH 24/29] Remove unused variable; NFC
---
clang/lib/Parse/ParseExpr.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 03d181586f83b5a..982520a30a35407 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2920,7 +2920,6 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
std::vector<char> BinaryData;
StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
- StringRef Base64StrData = Base64Str->getBytes();
if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
<< 0
>From a5517cbb33fc825dbdea037df8967971fe63727d Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:16:25 -0500
Subject: [PATCH 25/29] Fix a typo that snuck into this test
---
clang/test/Preprocessor/embed_preprocess_to_file.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
index 96447d4d6b11f7e..1706ac457e9224c 100644
--- a/clang/test/Preprocessor/embed_preprocess_to_file.c
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -10,4 +10,3 @@ const char data[] = {
// CHECK: # 1 "<built-in:embed:1>" 1
// CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
// CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
-};
>From 7f856ddef8f43b8c81d02ec2cee3c67e9a0420db Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Fri, 10 Nov 2023 09:01:09 -0500
Subject: [PATCH 26/29] Fix another type mismatch that was upsetting the
precommit CI bot
---
clang/lib/Lex/PPMacroExpansion.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index a55bc719328ad62..3ff3055a17c4e90 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1356,7 +1356,7 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
assert(!Filename.empty());
const FileEntry *LookupFromFile =
this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
- : nullptr;
+ : static_cast<FileEntry *>(nullptr);
OptionalFileEntryRef MaybeFileEntry =
this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
LookupFromFile, nullptr, &RelativePath);
>From b8a57c23e5d36508c4c2b518d7f2fb555d7928fb Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 13 Nov 2023 08:49:18 -0500
Subject: [PATCH 27/29] Fix new compile error from rebase; NFC
---
clang/lib/Frontend/DependencyFile.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 0fd10b2a177a0e3..b46a16282f22c9c 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -72,7 +72,6 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
/*FromModule*/ false,
/*IsSystem*/ false,
/*IsModuleFile*/ false,
- &PP.getFileManager(),
/*IsMissing*/ true);
// Files that actually exist are handled by FileChanged.
}
@@ -100,7 +99,6 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
DepCollector.maybeAddDependency(Filename,
/*FromModule=*/false, false,
/*IsModuleFile=*/false,
- &PP.getFileManager(),
/*IsMissing=*/false);
}
>From 6a6f813099b673660295f09d674fa02bf2efaca4 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 13 Nov 2023 13:19:37 -0500
Subject: [PATCH 28/29] Replace __builtin_pp_embed with annotation tokens
We do not want to have a builtin for embed because it poses too many
problems. For example, it allows for recursive embeds through:
__builtin_pp_embed(
#embed "file containing a file name.txt"
,
#embed "file containing base64 data.txt"
)
Instead, we'll use annotation tokens to pass information from the
preprocessor into the parser.
---
clang/include/clang/AST/Expr.h | 2 +-
clang/include/clang/Basic/TokenKinds.def | 5 +-
clang/include/clang/Lex/PreprocessorOptions.h | 4 -
clang/include/clang/Sema/Sema.h | 2 +-
clang/lib/AST/StmtPrinter.cpp | 7 +-
clang/lib/Frontend/CompilerInvocation.cpp | 19 --
clang/lib/Lex/PPDirectives.cpp | 127 +++++++-------
clang/lib/Parse/ParseExpr.cpp | 164 ++++++++----------
clang/lib/Sema/SemaDecl.cpp | 3 +-
clang/test/Parser/embed_builtin.cpp | 14 --
clang/test/Preprocessor/embed_builtin.cpp | 6 -
.../Preprocessor/embed_preprocess_to_file.c | 4 +-
12 files changed, 151 insertions(+), 206 deletions(-)
delete mode 100644 clang/test/Parser/embed_builtin.cpp
delete mode 100644 clang/test/Preprocessor/embed_builtin.cpp
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index f6d1fdd5bea2c72..6345faefa62ff26 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4810,7 +4810,7 @@ class SourceLocExpr final : public Expr {
friend class ASTStmtReader;
};
-/// Represents a function call to __builtin_pp_embed().
+/// Represents a #embed "expression".
class PPEmbedExpr final : public Expr {
SourceLocation BuiltinLoc, RParenLoc;
DeclContext *ParentContext;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index cd5f2016c002d4e..41f308b23ecbb6e 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -756,7 +756,6 @@ ALIAS("__char32_t" , char32_t , KEYCXX)
KEYWORD(__builtin_bit_cast , KEYALL)
KEYWORD(__builtin_available , KEYALL)
KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL)
-KEYWORD(__builtin_pp_embed , KEYALL)
// Keywords defined by Attr.td.
#ifndef KEYWORD_ATTRIBUTE
@@ -973,6 +972,10 @@ ANNOTATION(header_unit)
// Annotation for end of input in clang-repl.
ANNOTATION(repl_input_end)
+// Annotation for #embed
+ANNOTATION(embed_start)
+ANNOTATION(embed_end)
+
#undef PRAGMA_ANNOTATION
#undef ANNOTATION
#undef TESTING_KEYWORD
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 23f3458d79e0312..d0dac8c23ab0dea 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -170,10 +170,6 @@ class PreprocessorOptions {
/// User specified embed entries.
std::vector<std::string> EmbedEntries;
- /// Whether or not naive expansion should be used all the time for
- /// builtin embed
- bool NoBuiltinPPEmbed = false;
-
/// Whether the compiler instance should retain (i.e., not free)
/// the buffers associated with remapped files.
///
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 967f40a3e2de901..48eac54f36f676e 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6089,7 +6089,7 @@ class Sema final {
SourceLocation BuiltinLoc,
SourceLocation RPLoc);
- // __builtin_pp_embed()
+ // #embed
ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
SourceLocation Base64DataLocation,
SourceLocation RPLoc, StringLiteral *Filename,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 426de0696965ac0..fa429ed7c7a0493 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1147,9 +1147,10 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
}
void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) {
- OS << "__builtin_pp_embed(" << Node->getType() << ", "
- << Node->getFilenameStringLiteral()->getBytes() << ", \""
- << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")";
+ // This isn't yet implemented because the contents of the PPEmbedExpr are
+ // not generally retained in the AST. e.g., when used as an initializer, the
+ // expression will be converted into an InitListExpr, etc.
+ assert(false && "not yet implemented");
}
void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index b7ed4bde112e328..25e73f805246b7a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4329,15 +4329,6 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
for (const auto &EmbedEntry : Opts.EmbedEntries)
GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
- if (Opts.NoBuiltinPPEmbed) {
- // We need to figure out whether the user passed -fno-builtins or
- // specifically disabled pp_embed. If NoBuiltin is true, we don't need to
- // generate an arg because that disables everything. Otherwise, we assume
- // the user passed -fno-builtin-pp_embed and generate that.
- if (!LangOpts.NoBuiltin)
- GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
- }
-
// Don't handle LexEditorPlaceholders. It is implied by the action that is
// generated elsewhere.
}
@@ -4435,16 +4426,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
Opts.EmbedEntries.push_back(std::string(Val));
}
- // Can disable the internal embed builtin / token
- for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
- bool NoBuiltinEmbed = false;
- if (A->getNumValues())
- NoBuiltinEmbed = A->getValue() == StringRef("pp_embed");
- else
- NoBuiltinEmbed = true; // All builtins are disabled.
- Opts.NoBuiltinPPEmbed = NoBuiltinEmbed;
- }
-
// Always avoid lexing editor placeholders when we're just running the
// preprocessor as we never want to emit the
// "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 695fca9f5157aaa..78269021484824d 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -4070,19 +4070,41 @@ static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1,
}
}
-void Preprocessor::HandleEmbedDirectiveBuiltin(
+void Preprocessor::HandleEmbedDirectiveImpl(
SourceLocation HashLoc, const Token &FilenameTok,
StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
const LexEmbedParametersResult &Params, StringRef BinaryContents,
const size_t TargetCharWidth) {
- // if it's empty, just process it like a normal expanded token stream
+ // Pass off the annotation token stream. The parser expects:
+ // if_empty-tokens or
+ // embed-annotation-start
+ // type-name string-literal , string-literal
+ // embed-annotation-stop
+ // where the type-name is the type used for each element to embed, the first
+ // string-literal is the resolved file name of the file we loaded contents
+ // from, and the second string-literal is the base64 encoded data we loaded
+ // from the file. The comma separation between string-literals prevents the
+ // literals from combining into a single string literal.
+ auto EmitToks = [&](ArrayRef<Token> Toks) {
+ size_t TokCount = Toks.size();
+ auto NewToks = std::make_unique<Token[]>(TokCount);
+ llvm::copy(Toks, NewToks.get());
+ EnterTokenStream(std::move(NewToks), TokCount, true, true);
+ };
if (BinaryContents.empty()) {
- HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
- BinaryContents, TargetCharWidth);
+ // If we have no binary contents, the only thing we need to emit are the
+ // if_empty tokens, if any.
+ // FIXME: this loses AST fidelity; nothing in the compiler will see that
+ // these tokens came from #embed.
+ if (Params.MaybeIfEmptyParam)
+ EmitToks(Params.MaybeIfEmptyParam->Tokens);
return;
}
- SmallVector<char, 2> BinaryPrefix{};
- SmallVector<char, 2> BinarySuffix{};
+
+ // FIXME: this is not correct; the standard allows *arbitrary* tokens in the
+ // prefix and suffix, but this only accounts for numeric literals and commas,
+ // but nothing else.
+ SmallVector<char, 2> BinaryPrefix, BinarySuffix;
if (Params.MaybePrefixParam) {
// If we ahve a prefix, validate that it's a good fit for direct data
// embedded (and prepare to prepend it)
@@ -4095,7 +4117,7 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
}
}
if (Params.MaybeSuffixParam) {
- // If we ahve a prefix, validate that it's a good fit for direct data
+ // If we have a prefix, validate that it's a good fit for direct data
// embedding (and prepare to append it)
const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
if (!TokenListIsCharacterArray(*this, TargetCharWidth, false,
@@ -4106,50 +4128,43 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
}
}
- // Load up a new embed buffer for this file and set of parameters in
- // particular.
- EmbedBuffers.push_back("");
- size_t EmbedBufferNumber = EmbedBuffers.size();
- std::string &TargetEmbedBuffer = EmbedBuffers.back();
- StringRef TypeName = "unsigned char";
- const size_t TotalSize =
- BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size();
- const size_t ReserveSize = // add up for necessary size:
- 19 // __builtin_pp_embed(
- + TypeName.size() // type-name
- + 2 // ,"
- + ResolvedFilename.size() // file-name
- + 3 // ","
- + (((TotalSize + 2) / 3) * 4) // base64-string
- + 2 // ");
- ;
- // Reserve appropriate size
- TargetEmbedBuffer.reserve(ReserveSize);
+ // Now emit the tokens for the embedded content itself.
+ std::string EncodedContents = llvm::encodeBase64(
+ (Twine(BinaryPrefix) + BinaryContents + Twine(BinarySuffix)).str());
+ auto SetAnnotTok = [](Token &Tok, tok::TokenKind Kind, SourceLocation Loc) {
+ Tok.startToken();
+ Tok.setKind(Kind);
+ Tok.setAnnotationRange(Loc);
+ };
+ auto SetStrTok = [&](Token &Tok, StringRef Contents, SourceLocation Loc) {
+ Tok.startToken();
+ Tok.setKind(tok::string_literal);
+ CreateString(("\"" + Contents + "\"").str(), Tok, Loc, Loc);
+ };
+ constexpr size_t TotalNumToks = 7;
+ auto Toks = std::make_unique<Token[]>(TotalNumToks);
- // Generate the look-alike source file
- TargetEmbedBuffer.append("__builtin_pp_embed(");
- TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
- TargetEmbedBuffer.append(",\"");
- TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size());
- TargetEmbedBuffer.append("\",\"");
- // include the prefix(...) and suffix(...) binary data in the total contents
- TripleEncodeBase64(
- StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents,
- StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer);
- TargetEmbedBuffer.append("\")");
- // Create faux-file and its ID, backed by a memory buffer.
- std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
- llvm::MemoryBuffer::getMemBufferCopy(
- TargetEmbedBuffer,
- "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
- assert(EmbedMemBuffer && "Cannot create predefined source buffer");
- FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
- assert(EmbedBufferFID.isValid() &&
- "Could not create FileID for #embed directive?");
- // Start parsing the look-alike source file for the embed directive and
- // pretend everything is normal
- // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
- EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
+ SetAnnotTok(Toks[0], tok::annot_embed_start, HashLoc);
+
+ Toks[1].startToken();
+ Toks[1].setLocation(HashLoc);
+ Toks[1].setKind(tok::kw_unsigned);
+
+ Toks[2].startToken();
+ Toks[2].setLocation(HashLoc);
+ Toks[2].setKind(tok::kw_char);
+
+ SetStrTok(Toks[3], ResolvedFilename, HashLoc);
+
+ Toks[4].startToken();
+ Toks[4].setLocation(HashLoc);
+ Toks[4].setKind(tok::comma);
+
+ SetStrTok(Toks[5], EncodedContents, HashLoc);
+
+ SetAnnotTok(Toks[6], tok::annot_embed_end, HashLoc);
+
+ EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
}
void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
@@ -4257,13 +4272,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
ParametersRange, MaybeFileRef, SearchPath,
RelativePath);
}
- if (PPOpts->NoBuiltinPPEmbed) {
- HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents,
- TargetCharWidth);
- } else {
- // emit a token directly, handle it internally.
- HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath,
- RelativePath, Params, BinaryContents,
- TargetCharWidth);
- }
+ HandleEmbedDirectiveImpl(HashLoc, FilenameTok, Filename, SearchPath,
+ RelativePath, Params, BinaryContents,
+ TargetCharWidth);
}
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 96bc542effde018..ba4aa59505d3199 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -808,7 +808,6 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
/// [MS] '__builtin_FUNCSIG' '(' ')'
/// [GNU] '__builtin_LINE' '(' ')'
/// [CLANG] '__builtin_COLUMN' '(' ')'
-/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')'
/// [GNU] '__builtin_source_location' '(' ')'
/// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')'
/// [GNU] '__null'
@@ -1054,6 +1053,76 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
break;
}
+ case tok::annot_embed_start: {
+ // The preprocessor has already validated the syntax of the #embed
+ // directive and has produced this series of tokens, so we do not need to
+ // check for syntactic correctness. The form will be:
+ // type-name string-literal , string-literal
+ //
+ // where the type-name is the type of the elements to embed, the first
+ // string-literal is the file name the user passed to the directive, and
+ // the second string-literal is base64 encoded data from that file.
+ SourceLocation StartLoc = ConsumeAnnotationToken();
+ SourceRange DataTyExprSourceRange;
+ TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
+ ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
+ // There is a comma separating the string literals to prevent them from
+ // combining into a single string literal.
+ ExpectAndConsume(tok::comma);
+ ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
+
+ const ASTContext &Context = Actions.getASTContext();
+ QualType DataTy = DataTyExpr.get().get().getCanonicalType();
+ size_t TargetWidth = Context.getTypeSize(DataTy);
+ if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+ DataTy.getUnqualifiedType() != Context.CharTy) {
+ // TODO: check if is exactly the same as unsigned char
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "only 'char' and 'unsigned char' are supported";
+ Res = ExprError();
+ }
+ if ((TargetWidth % CHAR_BIT) != 0) {
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "width of element type is not a multiple of host platform's "
+ "CHAR_BIT!";
+ Res = ExprError();
+ }
+
+ StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+ std::vector<char> BinaryData;
+ StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+ if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
+ Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+ << 0
+ << "'__builtin_pp_embed' with valid base64 encoding that is an "
+ "ordinary \"...\" string";
+ }
+ const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+ Diag(Base64Str->getExprLoc(), diag::err_builtin_pp_embed_invalid_argument)
+ << "expected a valid base64 encoded string";
+ };
+ llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+ llvm::handleAllErrors(std::move(Err), OnDecodeError);
+ if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+ Diag(DataTyExprSourceRange.getBegin(),
+ diag::err_builtin_pp_embed_invalid_argument)
+ << "size of data does not split evently into the number of bytes "
+ "requested";
+ Res = ExprError();
+ }
+
+ // Now we expect the end annotation token.
+ assert(Tok.is(tok::annot_embed_end));
+ SourceLocation EndLoc = ConsumeAnnotationToken();
+ if (!Res.isInvalid()) {
+ Res = Actions.ActOnPPEmbedExpr(
+ StartLoc, Base64ArgExpr.get()->getExprLoc(), EndLoc, FilenameLiteral,
+ DataTy, std::move(BinaryData));
+ }
+ } break;
+
case tok::kw___super:
case tok::kw_decltype:
// Annotate the token and tail recurse.
@@ -1352,7 +1421,6 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
case tok::kw___builtin_FUNCSIG:
case tok::kw___builtin_LINE:
case tok::kw___builtin_source_location:
- case tok::kw___builtin_pp_embed:
if (NotPrimaryExpression)
*NotPrimaryExpression = true;
// This parses the complete suffix; we can return early.
@@ -2608,7 +2676,6 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
/// [MS] '__builtin_FUNCSIG' '(' ')'
/// [GNU] '__builtin_LINE' '(' ')'
/// [CLANG] '__builtin_COLUMN' '(' ')'
-/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')'
/// [GNU] '__builtin_source_location' '(' ')'
/// [OCL] '__builtin_astype' '(' assignment-expression ',' type-name ')'
///
@@ -2877,97 +2944,6 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen());
break;
}
- case tok::kw___builtin_pp_embed: {
- // __builtin_pp_embed( type-name , string-literal , string-literal )
- SourceRange DataTyExprSourceRange;
- TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
-
- if (DataTyExpr.isInvalid()) {
- SkipUntil(tok::r_paren, StopAtSemi);
- return ExprError();
- }
-
- if (ExpectAndConsume(tok::comma)) {
- SkipUntil(tok::r_paren, StopAtSemi);
- return ExprError();
- }
-
- if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
- Diag(Tok, diag::err_expected_string_literal)
- << /*as argument*/ 5 << /*second argument*/ 2;
- SkipUntil(tok::r_paren, StopAtSemi);
- return ExprError();
- }
- ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
-
- if (FilenameArgExpr.isInvalid() || ExpectAndConsume(tok::comma)) {
- SkipUntil(tok::r_paren, StopAtSemi);
- return ExprError();
- }
-
- if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
- Diag(Tok, diag::err_expected_string_literal)
- << /*as argument*/ 5 << /*third argument*/ 3;
- SkipUntil(tok::r_paren, StopAtSemi);
- return ExprError();
- }
- ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
-
- if (Base64ArgExpr.isInvalid() || Tok.isNot(tok::r_paren)) {
- Diag(Tok, diag::err_expected) << tok::r_paren;
- return ExprError();
- }
-
- const ASTContext &Context = Actions.getASTContext();
- QualType DataTy = DataTyExpr.get().get().getCanonicalType();
- size_t TargetWidth = Context.getTypeSize(DataTy);
- if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
- DataTy.getUnqualifiedType() != Context.CharTy) {
- // TODO: check if is exactly the same as unsigned char
- Diag(DataTyExprSourceRange.getBegin(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "only 'char' and 'unsigned char' are supported";
- Res = ExprError();
- }
- if ((TargetWidth % CHAR_BIT) != 0) {
- Diag(DataTyExprSourceRange.getBegin(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "width of element type is not a multiple of host platform's "
- "CHAR_BIT!";
- Res = ExprError();
- }
-
- StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
- std::vector<char> BinaryData;
- StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
- if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
- Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
- << 0
- << "'__builtin_pp_embed' with valid base64 encoding that is an "
- "ordinary \"...\" string";
- }
- const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
- Diag(Base64Str->getExprLoc(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "expected a valid base64 encoded string";
- };
- llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
- llvm::handleAllErrors(std::move(Err), OnDecodeError);
- if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
- Diag(DataTyExprSourceRange.getBegin(),
- diag::err_builtin_pp_embed_invalid_argument)
- << "size of data does not split evently into the number of bytes "
- "requested";
- Res = ExprError();
- }
-
- if (!Res.isInvalid()) {
- Res = Actions.ActOnPPEmbedExpr(
- StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(),
- FilenameLiteral, DataTy, std::move(BinaryData));
- }
- break;
- }
}
if (Res.isInvalid())
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 6939252078674db..70dc6ad26abec27 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13371,7 +13371,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
// Expand the list in-place immediately, let the natural work take hold
Init = ExpandSinglePPEmbedExpr(PPEmbed);
} else {
- // `__builtin_pp_embed( ... )` only produces 2 or more values.
+ // #embed only produces 2 or more values.
+ // FIXME: still uses the old builtin name.
Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type)
<< "'__builtin_pp_embed'";
RealDecl->setInvalidDecl();
diff --git a/clang/test/Parser/embed_builtin.cpp b/clang/test/Parser/embed_builtin.cpp
deleted file mode 100644
index 487c11c393ad0ee..000000000000000
--- a/clang/test/Parser/embed_builtin.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-
-void parsing_diags() {
- __builtin_pp_embed; // expected-error {{expected '(' after '__builtin_pp_embed'}}
- __builtin_pp_embed(; // expected-error {{expected a type}}
- __builtin_pp_embed(); // expected-error {{expected a type}}
- __builtin_pp_embed(12); // expected-error {{expected a type}}
- __builtin_pp_embed(int); // expected-error {{expected ','}}
- __builtin_pp_embed(int, 12); // expected-error {{expected string literal as the 2nd argument}}
- __builtin_pp_embed(int, "", 12); // expected-error {{expected string literal as the 3rd argument}}
- __builtin_pp_embed(int, "", "", 12); // expected-error {{expected ')'}}
- (void)__builtin_pp_embed(char, L"", ""); // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
- (void)__builtin_pp_embed(char, "", L""); // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
-}
diff --git a/clang/test/Preprocessor/embed_builtin.cpp b/clang/test/Preprocessor/embed_builtin.cpp
deleted file mode 100644
index d2547fa0c3f668d..000000000000000
--- a/clang/test/Preprocessor/embed_builtin.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-// expected-no-diagnostics
-
-#if !__has_builtin(__builtin_pp_embed)
-#error "Don't have __builtin_pp_embed?"
-#endif
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
index 1706ac457e9224c..314cd823e577044 100644
--- a/clang/test/Preprocessor/embed_preprocess_to_file.c
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -7,6 +7,4 @@ const char data[] = {
#embed <media/art.txt>
};
-// CHECK: # 1 "<built-in:embed:1>" 1
-// CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
-// CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
+// CHECK: "{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg=="
>From c7e1304bc5db1e09bae4d2d70c0cdd8bfef768ab Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 13 Nov 2023 13:24:18 -0500
Subject: [PATCH 29/29] Formatting changes; NFC
---
clang/include/clang/Lex/Preprocessor.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 8db920ad2dc6610..1d7d2a1e62c2356 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2743,13 +2743,13 @@ class Preprocessor {
const LexEmbedParametersResult &Params,
StringRef BinaryContents,
const size_t TargetCharWidth);
- void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc,
- const Token &FilenameTok,
- StringRef ResolvedFilename,
- StringRef SearchPath, StringRef RelativePath,
- const LexEmbedParametersResult &Params,
- StringRef BinaryContents,
- const size_t TargetCharWidth);
+ void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
+ const Token &FilenameTok,
+ StringRef ResolvedFilename,
+ StringRef SearchPath, StringRef RelativePath,
+ const LexEmbedParametersResult &Params,
+ StringRef BinaryContents,
+ const size_t TargetCharWidth);
// File inclusion.
void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
More information about the flang-commits
mailing list