[libc] [clang] [lld] [lldb] [libcxx] [libcxxabi] [compiler-rt] [llvm] [clang-tools-extra] [flang] [libunwind] ✨ [Sema, Lex, Parse] Preprocessor embed in C and C++ (and Obj-C and Obj-C++ by-proxy) (PR #68620)

Aaron Ballman via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 30 08:19:49 PST 2023


https://github.com/AaronBallman updated https://github.com/llvm/llvm-project/pull/68620

>From 7050c932f63f9cb9e94636b287887f8241083117 Mon Sep 17 00:00:00 2001
From: ThePhD <phdofthehouse at gmail.com>
Date: Thu, 28 Sep 2023 18:31:34 -0400
Subject: [PATCH 01/50] =?UTF-8?q?=E2=9C=A8=20[Sema,=20Driver,=20Lex,=20Fro?=
 =?UTF-8?q?ntend]=20Implement=20naive=20#embed=20for=20C23=20and=20C++26.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🛠 [Frontend] Ensure commas inserted by #embed are properly serialized to output
---
 clang/CMakeLists.txt                          |   3 +-
 clang/include/clang/Basic/Builtins.def        |   3 +
 clang/include/clang/Basic/DiagnosticGroups.td |   6 +
 .../include/clang/Basic/DiagnosticLexKinds.td |  24 +-
 clang/include/clang/Basic/FileManager.h       |   8 +-
 clang/include/clang/Basic/TokenKinds.def      |   7 +
 clang/include/clang/Driver/Options.td         |  16 +
 .../Frontend/PreprocessorOutputOptions.h      |   2 +
 clang/include/clang/Lex/PPCallbacks.h         |  77 ++-
 clang/include/clang/Lex/Preprocessor.h        |  66 ++-
 clang/include/clang/Lex/PreprocessorOptions.h |   7 +
 clang/lib/Basic/FileManager.cpp               |   8 +-
 clang/lib/Basic/IdentifierTable.cpp           |   3 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   5 +-
 clang/lib/Format/FormatToken.h                |   2 +
 clang/lib/Format/TokenAnnotator.cpp           |  28 +
 clang/lib/Frontend/CompilerInvocation.cpp     |  19 +
 clang/lib/Frontend/DependencyFile.cpp         |  29 +
 clang/lib/Frontend/DependencyGraph.cpp        |  43 +-
 clang/lib/Frontend/InitPreprocessor.cpp       |   7 +
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  25 +-
 .../Frontend/Rewrite/InclusionRewriter.cpp    |  13 +
 clang/lib/Lex/PPCallbacks.cpp                 |  11 -
 clang/lib/Lex/PPDirectives.cpp                | 500 ++++++++++++++++++
 clang/lib/Lex/PPExpressions.cpp               |  44 +-
 clang/lib/Lex/PPMacroExpansion.cpp            | 120 +++++
 clang/test/Preprocessor/Inputs/jk.txt         |   1 +
 clang/test/Preprocessor/Inputs/media/art.txt  |   9 +
 clang/test/Preprocessor/Inputs/media/empty    |   0
 .../test/Preprocessor/Inputs/single_byte.txt  |   1 +
 clang/test/Preprocessor/embed___has_embed.c   |  34 ++
 .../embed___has_embed_supported.c             |  24 +
 .../test/Preprocessor/embed_feature_test.cpp  |  13 +
 .../test/Preprocessor/embed_file_not_found.c  |   4 +
 clang/test/Preprocessor/embed_init.c          |  28 +
 .../Preprocessor/embed_parameter_if_empty.c   |  16 +
 .../test/Preprocessor/embed_parameter_limit.c |  15 +
 .../Preprocessor/embed_parameter_offset.c     |  15 +
 .../Preprocessor/embed_parameter_prefix.c     |  15 +
 .../Preprocessor/embed_parameter_suffix.c     |  15 +
 .../embed_parameter_unrecognized.c            |   8 +
 clang/test/Preprocessor/embed_path_chevron.c  |   8 +
 clang/test/Preprocessor/embed_path_quote.c    |   8 +
 clang/test/Preprocessor/single_byte.txt       |   1 +
 llvm/CMakeLists.txt                           |   7 +
 llvm/cmake/modules/GetHostTriple.cmake        |   6 +-
 46 files changed, 1264 insertions(+), 40 deletions(-)
 create mode 100644 clang/test/Preprocessor/Inputs/jk.txt
 create mode 100644 clang/test/Preprocessor/Inputs/media/art.txt
 create mode 100644 clang/test/Preprocessor/Inputs/media/empty
 create mode 100644 clang/test/Preprocessor/Inputs/single_byte.txt
 create mode 100644 clang/test/Preprocessor/embed___has_embed.c
 create mode 100644 clang/test/Preprocessor/embed___has_embed_supported.c
 create mode 100644 clang/test/Preprocessor/embed_feature_test.cpp
 create mode 100644 clang/test/Preprocessor/embed_file_not_found.c
 create mode 100644 clang/test/Preprocessor/embed_init.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_if_empty.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_limit.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_offset.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_prefix.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_suffix.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_unrecognized.c
 create mode 100644 clang/test/Preprocessor/embed_path_chevron.c
 create mode 100644 clang/test/Preprocessor/embed_path_quote.c
 create mode 100644 clang/test/Preprocessor/single_byte.txt

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 9b52c58be41e7f7..1b88905da3b8597 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -300,6 +300,7 @@ configure_file(
   ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc)
 
 # Add appropriate flags for GCC
+option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
   if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -307,7 +308,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   endif ()
 
   # Enable -pedantic for Clang even if it's not enabled for LLVM.
-  if (NOT LLVM_ENABLE_PEDANTIC)
+  if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long")
   endif ()
 
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 6ea8484606cfd5d..0dfc6456daf059a 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1766,6 +1766,9 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
 // Arithmetic Fence: to prevent FP reordering and reassociation optimizations
 LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES)
 
+// preprocessor embed builtin
+LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES)
+
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 0b09c002191848a..89f6715cebfdc0d 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -708,6 +708,12 @@ def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">;
 def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>;
 def RestrictExpansionMacro : DiagGroup<"restrict-expansion">;
 def FinalMacro : DiagGroup<"final-macro">;
+// Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions)
+def UnsupportedDirective : DiagGroup<"unsupported-directive">;
+def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">;
+def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">;
+def DirectiveParameters : DiagGroup<"directive-parameters",
+    [UnknownDirectiveParameters, IgnoredDirectiveParameters]>;
 
 // Just silence warnings about -Wstrict-aliasing for now.
 def : DiagGroup<"strict-aliasing=0">;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 940cca67368492f..4490f40806b0345 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -422,6 +422,22 @@ def warn_cxx23_compat_warning_directive : Warning<
 def warn_c23_compat_warning_directive : Warning<
   "#warning is incompatible with C standards before C23">,
   InGroup<CPre23Compat>, DefaultIgnore;
+def warn_c23_pp_embed : Warning<
+  "'__has_embed' is a C23 extension">,
+  InGroup<CPre23Compat>,
+  DefaultIgnore;
+def warn_c23_pp_has_embed : Warning<
+  "'__has_embed' is a C23 extension">,
+  InGroup<CPre23Compat>,
+  DefaultIgnore;
+def warn_cxx26_pp_embed : Warning<
+  "'__has_embed' is a C++26 extension">,
+  InGroup<CXXPre26Compat>,
+  DefaultIgnore;
+def warn_cxx26_pp_has_embed : Warning<
+  "'__has_embed' is a C++26 extension">,
+  InGroup<CXXPre26Compat>,
+  DefaultIgnore;
 
 def ext_pp_extra_tokens_at_eol : ExtWarn<
   "extra tokens at end of #%0 directive">, InGroup<ExtraTokens>;
@@ -483,7 +499,13 @@ def ext_pp_gnu_line_directive : Extension<
 def err_pp_invalid_directive : Error<
   "invalid preprocessing directive%select{|, did you mean '#%1'?}0">;
 def warn_pp_invalid_directive : Warning<
-  err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
+  err_pp_invalid_directive.Summary>,
+  InGroup<UnsupportedDirective>;
+def warn_pp_unknown_parameter_ignored : Warning<
+  "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">,
+  InGroup<UnknownDirectiveParameters>;
+def err_pp_unsupported_directive : Error<
+  "unsupported%select{ | embed}0 directive: %1">;
 def err_pp_directive_required : Error<
   "%0 must be used within a preprocessing directive">;
 def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal;
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 56cb093dd8c376f..c757f8775b425e9 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -276,11 +276,13 @@ class FileManager : public RefCountedBase<FileManager> {
   /// MemoryBuffer if successful, otherwise returning null.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(FileEntryRef Entry, bool isVolatile = false,
-                   bool RequiresNullTerminator = true);
+                   bool RequiresNullTerminator = true,
+                   std::optional<int64_t> MaybeLimit = std::nullopt);
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(StringRef Filename, bool isVolatile = false,
-                   bool RequiresNullTerminator = true) {
-    return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
+                   bool RequiresNullTerminator = true,
+                   std::optional<int64_t> MaybeLimit = std::nullopt) {
+    return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile,
                                 RequiresNullTerminator);
   }
 
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 94db56a9fd5d78c..19a66fbb0731194 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -126,6 +126,9 @@ PPKEYWORD(error)
 // C99 6.10.6 - Pragma Directive.
 PPKEYWORD(pragma)
 
+// C23 & C++26 #embed
+PPKEYWORD(embed)
+
 // GNU Extensions.
 PPKEYWORD(import)
 PPKEYWORD(include_next)
@@ -151,6 +154,10 @@ TOK(eod)                 // End of preprocessing directive (end of line inside a
                          // directive).
 TOK(code_completion)     // Code completion marker
 
+// #embed speed support
+TOK(builtin_embed)
+
+
 // C99 6.4.9: Comments.
 TOK(comment)             // Comment (only in -E -C[C] mode)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3f2058a5d4650ca..a77a1a5e9aad981 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -114,6 +114,11 @@ def IncludePath_Group : OptionGroup<"<I/i group>">, Group<Preprocessor_Group>,
                         DocBrief<[{
 Flags controlling how ``#include``\s are resolved to files.}]>;
 
+def EmbedPath_Group : OptionGroup<"<Embed group>">, Group<Preprocessor_Group>,
+                        DocName<"Embed path management">,
+                        DocBrief<[{
+Flags controlling how ``#embed``\s and similar are resolved to files.}]>;
+
 def I_Group : OptionGroup<"<I group>">, Group<IncludePath_Group>, DocFlatten;
 def i_Group : OptionGroup<"<i group>">, Group<IncludePath_Group>, DocFlatten;
 def clang_i_Group : OptionGroup<"<clang i group>">, Group<i_Group>, DocFlatten;
@@ -816,6 +821,14 @@ will be ignored}]>;
 def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     Visibility<[ClangOption, FlangOption]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
+def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
+    Flags<[RenderJoined]>, Group<EmbedPath_Group>,
+    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
+def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
+    Flags<[RenderJoined]>, Group<EmbedPath_Group>,
+    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
 def MD : Flag<["-"], "MD">, Group<M_Group>,
     HelpText<"Write a depfile containing user and system headers">;
 def MMD : Flag<["-"], "MMD">, Group<M_Group>,
@@ -1353,6 +1366,9 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>
 def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Print include directives in -E mode in addition to normal output">,
   MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>;
+def dE : Flag<["-"], "dE">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Print embed directives in -E mode in addition to normal output">,
+  MarshallingInfoFlag<PreprocessorOutputOpts<"ShowEmbedDirectives">>;
 def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Print macro definitions in -E mode instead of normal output">;
 def dead__strip : Flag<["-"], "dead_strip">;
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index db2ec9f2ae20698..3e36db3f8ce46ea 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -22,6 +22,7 @@ class PreprocessorOutputOptions {
   unsigned ShowMacroComments : 1;  ///< Show comments, even in macros.
   unsigned ShowMacros : 1;         ///< Print macro definitions.
   unsigned ShowIncludeDirectives : 1;  ///< Print includes, imports etc. within preprocessed output.
+  unsigned ShowEmbedDirectives : 1;  ///< Print embeds, etc. within preprocessed output.
   unsigned RewriteIncludes : 1;    ///< Preprocess include directives only.
   unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
   unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
@@ -37,6 +38,7 @@ class PreprocessorOutputOptions {
     ShowMacroComments = 0;
     ShowMacros = 0;
     ShowIncludeDirectives = 0;
+    ShowEmbedDirectives = 0;
     RewriteIncludes = 0;
     RewriteImports = 0;
     MinimizeWhitespace = 0;
diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index 94f96cf9c512541..921bf159ead570d 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -83,6 +83,47 @@ class PPCallbacks {
                            const Token &FilenameTok,
                            SrcMgr::CharacteristicKind FileType) {}
 
+  /// Callback invoked whenever the preprocessor cannot find a file for an
+  /// embed directive.
+  ///
+  /// \param FileName The name of the file being included, as written in the
+  /// source code.
+  ///
+  /// \returns true to indicate that the preprocessor should skip this file
+  /// and not issue any diagnostic.
+  virtual bool EmbedFileNotFound(StringRef FileName) { return false; }
+
+  /// Callback invoked whenever an embed directive has been processed,
+  /// regardless of whether the embed will actually find a file.
+  ///
+  /// \param HashLoc The location of the '#' that starts the embed directive.
+  ///
+  /// \param FileName The name of the file being included, as written in the
+  /// source code.
+  ///
+  /// \param IsAngled Whether the file name was enclosed in angle brackets;
+  /// otherwise, it was enclosed in quotes.
+  ///
+  /// \param FilenameRange The character range of the quotes or angle brackets
+  /// for the written file name.
+  ///
+  /// \param ParametersRange The character range of the embed parameters. An
+  /// empty range if there were no parameters.
+  ///
+  /// \param File The actual file that may be included by this embed directive.
+  ///
+  /// \param SearchPath Contains the search path which was used to find the file
+  /// in the file system. If the file was found via an absolute path,
+  /// SearchPath will be empty.
+  ///
+  /// \param RelativePath The path relative to SearchPath, at which the resource
+  /// file was found. This is equal to FileName.
+  virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName,
+                              bool IsAngled, CharSourceRange FilenameRange,
+                              CharSourceRange ParametersRange,
+                              OptionalFileEntryRef File, StringRef SearchPath,
+                              StringRef RelativePath) {}
+
   /// Callback invoked whenever the preprocessor cannot find a file for an
   /// inclusion directive.
   ///
@@ -330,11 +371,15 @@ class PPCallbacks {
                        SourceRange Range) {
   }
 
+  /// Hook called when a '__has_embed' directive is read.
+  virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+                        OptionalFileEntryRef File) {}
+
   /// Hook called when a '__has_include' or '__has_include_next' directive is
   /// read.
   virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
                           OptionalFileEntryRef File,
-                          SrcMgr::CharacteristicKind FileType);
+                          SrcMgr::CharacteristicKind FileType) {}
 
   /// Hook called when a source range is skipped.
   /// \param Range The SourceRange that was skipped. The range begins at the
@@ -461,6 +506,25 @@ class PPChainedCallbacks : public PPCallbacks {
     Second->FileSkipped(SkippedFile, FilenameTok, FileType);
   }
 
+  bool EmbedFileNotFound(StringRef FileName) override {
+    bool Skip = First->FileNotFound(FileName);
+    // Make sure to invoke the second callback, no matter if the first already
+    // returned true to skip the file.
+    Skip |= Second->FileNotFound(FileName);
+    return Skip;
+  }
+
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override {
+    First->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
+                          ParametersRange, File, SearchPath, RelativePath);
+    Second->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
+                           ParametersRange, File, SearchPath, RelativePath);
+  }
+
   bool FileNotFound(StringRef FileName) override {
     bool Skip = First->FileNotFound(FileName);
     // Make sure to invoke the second callback, no matter if the first already
@@ -561,9 +625,18 @@ class PPChainedCallbacks : public PPCallbacks {
     Second->PragmaDiagnostic(Loc, Namespace, mapping, Str);
   }
 
+  void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+                OptionalFileEntryRef File) override {
+    First->HasEmbed(Loc, FileName, IsAngled, File);
+    Second->HasEmbed(Loc, FileName, IsAngled, File);
+  }
+
   void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
                   OptionalFileEntryRef File,
-                  SrcMgr::CharacteristicKind FileType) override;
+                  SrcMgr::CharacteristicKind FileType) override {
+    First->HasInclude(Loc, FileName, IsAngled, File, FileType);
+    Second->HasInclude(Loc, FileName, IsAngled, File, FileType);
+  }
 
   void PragmaOpenCLExtension(SourceLocation NameLoc, const IdentifierInfo *Name,
                              SourceLocation StateLoc, unsigned State) override {
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 18d88407ae12c90..7470bf5882730cb 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -31,6 +31,7 @@
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -53,6 +54,7 @@
 #include <optional>
 #include <string>
 #include <utility>
+#include <variant>
 #include <vector>
 
 namespace llvm {
@@ -165,6 +167,7 @@ class Preprocessor {
   IdentifierInfo *Ident__has_builtin;              // __has_builtin
   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
   IdentifierInfo *Ident__has_attribute;            // __has_attribute
+  IdentifierInfo *Ident__has_embed;                // __has_embed
   IdentifierInfo *Ident__has_include;              // __has_include
   IdentifierInfo *Ident__has_include_next;         // __has_include_next
   IdentifierInfo *Ident__has_warning;              // __has_warning
@@ -206,7 +209,10 @@ class Preprocessor {
 
   enum {
     /// Maximum depth of \#includes.
-    MaxAllowedIncludeStackDepth = 200
+    MaxAllowedIncludeStackDepth = 200,
+    VALUE__STDC_EMBED_NOT_FOUND__ = 0,
+    VALUE__STDC_EMBED_FOUND__ = 1,
+    VALUE__STDC_EMBED_EMPTY__ = 2,
   };
 
   // State that is set before the preprocessor begins.
@@ -1728,6 +1734,22 @@ class Preprocessor {
   /// Lex a token, forming a header-name token if possible.
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
+  struct LexEmbedParametersResult {
+    bool Successful;
+    std::optional<size_t> MaybeLimitParam;
+    std::optional<size_t> MaybeOffsetParam;
+    std::optional<SmallVector<Token, 2>> MaybeIfEmptyParam;
+    std::optional<SmallVector<Token, 2>> MaybePrefixParam;
+    std::optional<SmallVector<Token, 2>> MaybeSuffixParam;
+    int UnrecognizedParams;
+    SourceLocation StartLoc;
+    SourceLocation EndLoc;
+  };
+
+  LexEmbedParametersResult LexEmbedParameters(Token &Current,
+                                              bool InHasEmbed = false,
+                                              bool DiagnoseUnknown = true);
+
   bool LexAfterModuleImport(Token &Result);
   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
 
@@ -2413,6 +2435,17 @@ class Preprocessor {
              bool *IsFrameworkFound, bool SkipCache = false,
              bool OpenFile = true, bool CacheFailures = true);
 
+  /// Given a "foo" or \<foo> reference, look up the indicated embed resource.
+  ///
+  /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
+  /// reference is for system \#include's or not (i.e. using <> instead of "").
+  OptionalFileEntryRef
+  LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+                  bool OpenFile,
+                  const FileEntry *LookupFromFile = nullptr,
+                  SmallVectorImpl<char> *SearchPath = nullptr,
+                  SmallVectorImpl<char> *RelativePath = nullptr);
+
   /// Return true if we're in the top-level file, not in a \#include.
   bool isInPrimaryFile() const;
 
@@ -2517,6 +2550,9 @@ class Preprocessor {
   /// Information about the result for evaluating an expression for a
   /// preprocessor directive.
   struct DirectiveEvalResult {
+    /// The integral value of the expression.
+    std::optional<llvm::APSInt> Value;
+
     /// Whether the expression was evaluated as true or not.
     bool Conditional;
 
@@ -2531,7 +2567,24 @@ class Preprocessor {
   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
   ///
   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
-  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                                  bool CheckForEoD = true,
+                                                  bool Parenthesized = false);
+
+  /// Evaluate an integer constant expression that may occur after a
+  /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
+  ///
+  /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                                  Token &Tok,
+                                                  bool CheckForEoD = true,
+                                                  bool Parenthesized = false);
+
+  /// Process a '__has_embed("path" [, ...])' expression.
+  ///
+  /// Returns predefined `__STDC_EMBED_*` macro values if
+  /// successful.
+  int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
 
   /// Process a '__has_include("path")' expression.
   ///
@@ -2679,6 +2732,15 @@ class Preprocessor {
       const FileEntry *LookupFromFile, StringRef &LookupFilename,
       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
+  // Binary data inclusion
+  void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+                            const FileEntry *LookupFromFile = nullptr);
+  void HandleEmbedDirectiveNaive(
+      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
+      StringRef BinaryContents, const size_t TargetCharWidth);
+  void HandleEmbedDirectiveBuiltin(
+      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
+      StringRef BinaryContents, const size_t TargetCharWidth);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 058194bcde72e51..23f3458d79e0312 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -167,6 +167,13 @@ class PreprocessorOptions {
   /// of the specified memory buffer (the second part of each pair).
   std::vector<std::pair<std::string, llvm::MemoryBuffer *>> RemappedFileBuffers;
 
+  /// User specified embed entries.
+  std::vector<std::string> EmbedEntries;
+
+  /// Whether or not naive expansion should be used all the time for
+  /// builtin embed
+  bool NoBuiltinPPEmbed = false;
+
   /// Whether the compiler instance should retain (i.e., not free)
   /// the buffers associated with remapped files.
   ///
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index d16626b10652136..e0e80b5e0fbedbe 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -537,13 +537,19 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
 
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
 FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
-                              bool RequiresNullTerminator) {
+                              bool RequiresNullTerminator,
+                              std::optional<int64_t> MaybeLimit) {
   const FileEntry *Entry = &FE.getFileEntry();
   // If the content is living on the file entry, return a reference to it.
   if (Entry->Content)
     return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef());
 
   uint64_t FileSize = Entry->getSize();
+
+  if (MaybeLimit)
+    FileSize = *MaybeLimit;
+
+
   // If there's a high enough chance that the file have changed since we
   // got its size, force a stat before opening it.
   if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index e5599d545541085..d2b5426d27bb3b2 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -423,7 +423,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // case values).  Note that this depends on 'if' being null terminated.
 
 #define HASH(LEN, FIRST, THIRD) \
-  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
+  (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63)
 #define CASE(LEN, FIRST, THIRD, NAME) \
   case HASH(LEN, FIRST, THIRD): \
     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -438,6 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 4, 'e', 's', else);
   CASE( 4, 'l', 'n', line);
   CASE( 4, 's', 'c', sccs);
+  CASE( 5, 'e', 'b', embed);
   CASE( 5, 'e', 'd', endif);
   CASE( 5, 'e', 'r', error);
   CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b91126ebed0186c..fc2f749a34fc471 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1324,7 +1324,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map});
+                   options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
@@ -8182,6 +8182,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
+  // Pass along any -embed-dir or similar options so we get proper embed paths.
+  Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group);
+
   // Determine the original source input.
   auto FindSource = [](const Action *S) -> const Action * {
     while (S->getKind() != Action::InputClass) {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 606e9e790ad833b..232626e783e1b7d 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -1008,6 +1008,7 @@ struct AdditionalKeywords {
     kw_synchronized = &IdentTable.get("synchronized");
     kw_throws = &IdentTable.get("throws");
     kw___except = &IdentTable.get("__except");
+    kw___has_embed = &IdentTable.get("__has_embed");
     kw___has_include = &IdentTable.get("__has_include");
     kw___has_include_next = &IdentTable.get("__has_include_next");
 
@@ -1305,6 +1306,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_NS_ERROR_ENUM;
   IdentifierInfo *kw_NS_OPTIONS;
   IdentifierInfo *kw___except;
+  IdentifierInfo *kw___has_embed;
   IdentifierInfo *kw___has_include;
   IdentifierInfo *kw___has_include_next;
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 543c119620bf28f..e405a9085951dc0 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1400,6 +1400,9 @@ class AnnotatingParser {
                        Keywords.kw___has_include_next)) {
         parseHasInclude();
       }
+      else if (Tok->is(Keywords.kw___has_embed)) {
+        parseHasEmbed();
+      }
       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
           Tok->Next->isNot(tok::l_paren)) {
         Tok->setType(TT_CSharpGenericTypeConstraint);
@@ -1464,6 +1467,21 @@ class AnnotatingParser {
     }
   }
 
+  void parseEmbedDirective() {
+    if (CurrentToken && CurrentToken->is(tok::less)) {
+      next();
+      while (CurrentToken) {
+        // Mark tokens up to the trailing line comments as implicit string
+        // literals.
+        if (CurrentToken->isNot(tok::comment) &&
+            !CurrentToken->TokenText.startswith("//")) {
+          CurrentToken->setType(TT_ImplicitStringLiteral);
+        }
+        next();
+      }
+    }
+  }
+
   void parseWarningOrError() {
     next();
     // We still want to format the whitespace left of the first token of the
@@ -1500,6 +1518,14 @@ class AnnotatingParser {
     next(); // ')'
   }
 
+  void parseHasEmbed() {
+    if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
+      return;
+    next(); // '('
+    parseEmbedDirective();
+    next(); // ')'
+  }
+
   LineType parsePreprocessorDirective() {
     bool IsFirstToken = CurrentToken->IsFirst;
     LineType Type = LT_PreprocessorDirective;
@@ -1563,6 +1589,8 @@ class AnnotatingParser {
       } else if (Tok->isOneOf(Keywords.kw___has_include,
                               Keywords.kw___has_include_next)) {
         parseHasInclude();
+      } else if (Tok->is(Keywords.kw___has_embed)) {
+        parseHasEmbed();
       }
     }
     return Type;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bb442495f58359c..05406b5d42d7380 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4302,6 +4302,12 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
   if (Opts.SourceDateEpoch)
     GenerateArg(Consumer, OPT_source_date_epoch, Twine(*Opts.SourceDateEpoch));
 
+  for (const auto &EmbedEntry : Opts.EmbedEntries)
+    GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
+
+  if (Opts.NoBuiltinPPEmbed)
+    GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
 }
@@ -4394,6 +4400,19 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     }
   }
 
+  for (const auto *A : Args.filtered(OPT_embed_dir, OPT_embed_dir_EQ)) {
+    StringRef Val = A->getValue();
+    Opts.EmbedEntries.push_back(std::string(Val));
+  }
+
+  // Can disable the internal embed builtin / token
+  for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
+    StringRef Val = A->getValue();
+    if (Val == "pp_embed") {
+      Opts.NoBuiltinPPEmbed = true;
+    }
+  }
+
   // Always avoid lexing editor placeholders when we're just running the
   // preprocessor as we never want to emit the
   // "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index c2f6f41ae291efb..10558b1d34bf623 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -65,6 +65,21 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
+  void EmbedDirective(SourceLocation HashLoc,
+                          StringRef FileName, bool IsAngled,
+                          CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+                          OptionalFileEntryRef File, StringRef SearchPath,
+                          StringRef RelativePath) override {
+    if (!File)
+      DepCollector.maybeAddDependency(FileName,
+                                      /*FromModule*/ false,
+                                      /*IsSystem*/ false,
+                                      /*IsModuleFile*/ false,
+                                      &PP.getFileManager(),
+                                      /*IsMissing*/ true);
+    // Files that actually exist are handled by FileChanged.
+  }
+
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -81,6 +96,20 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
     // Files that actually exist are handled by FileChanged.
   }
 
+  void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
+                  OptionalFileEntryRef File) override {
+    if (!File)
+      return;
+    StringRef Filename =
+        llvm::sys::path::remove_leading_dotslash(File->getName());
+    DepCollector.maybeAddDependency(Filename,
+                                    /*FromModule=*/false,
+                                    false,
+                                    /*IsModuleFile=*/false,
+                                    &PP.getFileManager(),
+                                    /*IsMissing=*/false);
+  }
+
   void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
                   OptionalFileEntryRef File,
                   SrcMgr::CharacteristicKind FileType) override {
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 6aad04370f6e7ad..683f751a94244ec 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -26,6 +26,14 @@ namespace DOT = llvm::DOT;
 
 namespace {
 class DependencyGraphCallback : public PPCallbacks {
+public:
+  enum DirectiveBehavior {
+    Normal = 0,
+    IgnoreEmbed = 0b01,
+    IgnoreInclude = 0b10,
+  };
+
+private:
   const Preprocessor *PP;
   std::string OutputFile;
   std::string SysRoot;
@@ -34,6 +42,7 @@ class DependencyGraphCallback : public PPCallbacks {
       llvm::DenseMap<FileEntryRef, SmallVector<FileEntryRef, 2>>;
 
   DependencyMap Dependencies;
+  DirectiveBehavior Behavior;
 
 private:
   raw_ostream &writeNodeReference(raw_ostream &OS,
@@ -42,7 +51,8 @@ class DependencyGraphCallback : public PPCallbacks {
 
 public:
   DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
-                          StringRef SysRoot)
+                          StringRef SysRoot,
+                          DirectiveBehavior Action = IgnoreEmbed)
     : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
@@ -52,6 +62,12 @@ class DependencyGraphCallback : public PPCallbacks {
                           StringRef RelativePath, const Module *Imported,
                           SrcMgr::CharacteristicKind FileType) override;
 
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override;
+
   void EndOfMainFile() override {
     OutputGraphFile();
   }
@@ -70,6 +86,31 @@ void DependencyGraphCallback::InclusionDirective(
     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
     SrcMgr::CharacteristicKind FileType) {
+  if ((Behavior & IgnoreInclude) == IgnoreInclude) {
+    return;
+  }
+  if (!File)
+    return;
+
+  SourceManager &SM = PP->getSourceManager();
+  OptionalFileEntryRef FromFile =
+      SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc)));
+  if (!FromFile)
+    return;
+
+  Dependencies[*FromFile].push_back(*File);
+
+  AllFiles.insert(*File);
+  AllFiles.insert(*FromFile);
+}
+
+void DependencyGraphCallback::EmbedDirective(
+    SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+    CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+    OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+  if ((Behavior & IgnoreEmbed) == IgnoreEmbed) {
+    return;
+  }
   if (!File)
     return;
 
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 846e5fce6de7b2c..b7d084773b0a195 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -498,6 +498,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__STDC_UTF_16__", "1");
   Builder.defineMacro("__STDC_UTF_32__", "1");
 
+  // __has_embed definitions
+  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0");
+  Builder.defineMacro("__STDC_EMBED_FOUND__", "1");
+  Builder.defineMacro("__STDC_EMBED_EMPTY__", "2");
+
   if (LangOpts.ObjC)
     Builder.defineMacro("__OBJC__");
 
@@ -729,6 +734,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
   if (LangOpts.Char8)
     Builder.defineMacro("__cpp_char8_t", "202207L");
   Builder.defineMacro("__cpp_impl_destroying_delete", "201806L");
+
+  Builder.defineMacro("__cpp_pp_embed", "202403L");
 }
 
 /// InitializeOpenCLFeatureTestMacros - Define OpenCL macros based on target
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 7f5f6690682300e..fb9baa92e6836d3 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   bool DisableLineMarkers;
   bool DumpDefines;
   bool DumpIncludeDirectives;
+  bool DumpEmbedDirectives;
   bool UseLineDirectives;
   bool IsFirstFileEntered;
   bool MinimizeWhitespace;
@@ -106,12 +107,13 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 
 public:
   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
-                           bool defines, bool DumpIncludeDirectives,
+                           bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives,
                            bool UseLineDirectives, bool MinimizeWhitespace,
                            bool DirectivesOnly, bool KeepSystemIncludes)
       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
         DisableLineMarkers(lineMarkers), DumpDefines(defines),
         DumpIncludeDirectives(DumpIncludeDirectives),
+        DumpEmbedDirectives(DumpEmbedDirectives),
         UseLineDirectives(UseLineDirectives),
         MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
         KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
@@ -149,6 +151,11 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
                    FileID PrevFID) override;
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -398,6 +405,20 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
   }
 }
 
+void PrintPPOutputPPCallbacks::EmbedDirective(
+    SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+    CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+    OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+  // In -dI mode, dump #include directives prior to dumping their content or
+  // interpretation.
+  if (DumpEmbedDirectives) {
+    MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+    *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+       << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
+    setEmittedDirectiveOnThisLine();
+  }
+}
+
 void PrintPPOutputPPCallbacks::InclusionDirective(
     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
@@ -981,7 +1002,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
-      Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
+      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives,
       Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
 
   // Expand macros in pragmas with -fms-extensions.  The assumption is that
diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 28f7b0b9edfc5c2..4a73946951fd9c2 100644
--- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -71,6 +71,11 @@ class InclusionRewriter : public PPCallbacks {
                    FileID PrevFID) override;
   void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
                    SrcMgr::CharacteristicKind FileType) override;
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -177,6 +182,14 @@ void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
   LastInclusionLocation = SourceLocation();
 }
 
+/// This should be called whenever the preprocessor encounters embed
+/// directives.
+void InclusionRewriter::EmbedDirective(
+    SourceLocation /*HashLoc*/, StringRef /*FileName*/, bool /*IsAngled*/,
+    CharSourceRange /*FilenameRange*/, CharSourceRange /*ParametersRange*/,
+    OptionalFileEntryRef /*File*/, StringRef /*SearchPath*/,
+    StringRef /*RelativePath*/) {}
+
 /// This should be called whenever the preprocessor encounters include
 /// directives. It does not say whether the file has been included, but it
 /// provides more information about the directive (hash location instead
diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp
index f2b60a728e90178..ea5dce2c27a587c 100644
--- a/clang/lib/Lex/PPCallbacks.cpp
+++ b/clang/lib/Lex/PPCallbacks.cpp
@@ -14,16 +14,5 @@ using namespace clang;
 // Out of line key method.
 PPCallbacks::~PPCallbacks() = default;
 
-void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,
-                             bool IsAngled, OptionalFileEntryRef File,
-                             SrcMgr::CharacteristicKind FileType) {}
-
 // Out of line key method.
 PPChainedCallbacks::~PPChainedCallbacks() = default;
-
-void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,
-                                    bool IsAngled, OptionalFileEntryRef File,
-                                    SrcMgr::CharacteristicKind FileType) {
-  First->HasInclude(Loc, FileName, IsAngled, File, FileType);
-  Second->HasInclude(Loc, FileName, IsAngled, File, FileType);
-}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e3065c17dc70b43..e0d98d7ca03fa11 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -18,7 +18,9 @@
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/FrontendOptions.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LexDiagnostic.h"
@@ -1079,6 +1081,101 @@ OptionalFileEntryRef Preprocessor::LookupFile(
   return std::nullopt;
 }
 
+OptionalFileEntryRef Preprocessor::LookupEmbedFile(
+    SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+    bool OpenFile, const FileEntry *LookupFromFile,
+    SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath) {
+  FileManager &FM = this->getFileManager();
+  if (llvm::sys::path::is_absolute(Filename)) {
+    // lookup path or immediately fail
+    llvm::Expected<FileEntryRef> ShouldBeEntry =
+        FM.getFileRef(Filename, true, OpenFile);
+    return llvm::expectedToOptional(std::move(ShouldBeEntry));
+  }
+
+  // Otherwise, it's search time!
+  SmallString<512> LookupPath;
+  // Non-angled lookup
+  if (!isAngled) {
+    bool TryLocalLookup = false;
+    if (SearchPath) {
+      // use the provided search path as the local lookup path
+      llvm::sys::path::native(*SearchPath, LookupPath);
+      TryLocalLookup = true;
+    } else if (LookupFromFile) {
+      // Use file-based lookup here
+      StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
+      if (!FullFileDir.empty()) {
+        llvm::sys::path::native(FullFileDir, LookupPath);
+        llvm::sys::path::remove_filename(LookupPath);
+        TryLocalLookup = true;
+      }
+    } else {
+      // Cannot do local lookup: give up.
+      TryLocalLookup = false;
+    }
+    if (TryLocalLookup) {
+      if (!LookupPath.empty() &&
+          !llvm::sys::path::is_separator(LookupPath.back())) {
+        LookupPath.append(llvm::sys::path::get_separator());
+      }
+      LookupPath.append(Filename);
+      llvm::Expected<FileEntryRef> ShouldBeEntry =
+          FM.getFileRef(LookupPath, true, OpenFile);
+      if (ShouldBeEntry) {
+        return std::move(*ShouldBeEntry);
+      } else {
+        llvm::consumeError(ShouldBeEntry.takeError());
+      }
+    }
+  }
+
+  if (!isAngled) {
+    // do working directory lookup
+    LookupPath.clear();
+    auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
+    if (MaybeWorkingDirEntry) {
+      DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
+      StringRef WorkingDir = WorkingDirEntry.getName();
+      if (!WorkingDir.empty()) {
+        llvm::sys::path::native(WorkingDir, LookupPath);
+        if (!LookupPath.empty() &&
+            !llvm::sys::path::is_separator(LookupPath.back())) {
+          LookupPath.append(llvm::sys::path::get_separator());
+        }
+        LookupPath.append(llvm::sys::path::get_separator());
+        LookupPath.append(Filename);
+        llvm::Expected<FileEntryRef> ShouldBeEntry =
+            FM.getFileRef(LookupPath, true, OpenFile);
+        if (ShouldBeEntry) {
+          return std::move(*ShouldBeEntry);
+        } else {
+          llvm::consumeError(ShouldBeEntry.takeError());
+        }
+      }
+    }
+  }
+
+  for (const auto &Entry : PPOpts->EmbedEntries) {
+    LookupPath.clear();
+    llvm::sys::path::native(Entry, LookupPath);
+    if (!LookupPath.empty() &&
+        !llvm::sys::path::is_separator(LookupPath.back())) {
+      LookupPath.append(llvm::sys::path::get_separator());
+    }
+    LookupPath.append(Filename.begin(), Filename.end());
+    llvm::sys::path::native(LookupPath);
+    llvm::Expected<FileEntryRef> ShouldBeEntry =
+        FM.getFileRef(LookupPath, true, OpenFile);
+    if (ShouldBeEntry) {
+      return std::move(*ShouldBeEntry);
+    } else {
+      llvm::consumeError(ShouldBeEntry.takeError());
+    }
+  }
+  return std::nullopt;
+}
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Directive Handling.
 //===----------------------------------------------------------------------===//
@@ -1174,6 +1271,7 @@ void Preprocessor::HandleDirective(Token &Result) {
       case tok::pp_include_next:
       case tok::pp___include_macros:
       case tok::pp_pragma:
+      case tok::pp_embed:
         Diag(Result, diag::err_embedded_directive) << II->getName();
         Diag(*ArgMacro, diag::note_macro_expansion_here)
             << ArgMacro->getIdentifierInfo();
@@ -1288,6 +1386,11 @@ void Preprocessor::HandleDirective(Token &Result) {
       return HandleIdentSCCSDirective(Result);
     case tok::pp_sccs:
       return HandleIdentSCCSDirective(Result);
+    case tok::pp_embed:
+      return HandleEmbedDirective(SavedHash.getLocation(), Result,
+                                  getCurrentFileLexer()
+                                      ? getCurrentFileLexer()->getFileEntry()
+                                      : nullptr);
     case tok::pp_assert:
       //isExtension = true;  // FIXME: implement #assert
       break;
@@ -3517,3 +3620,400 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
 }
+
+enum class BracketType { Brace, Paren, Square };
+
+Preprocessor::LexEmbedParametersResult
+Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
+                                 bool DiagnoseUnknown) {
+  LexEmbedParametersResult Result{};
+  SmallString<32> Parameter;
+  SmallVector<Token, 2> ParameterTokens;
+  tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod;
+  Result.StartLoc = CurTok.getLocation();
+  for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) {
+    Parameter.clear();
+    // Lex identifier [:: identifier ...]
+    if (!CurTok.is(tok::identifier)) {
+      Diag(CurTok, diag::err_expected) << "identifier";
+      DiscardUntilEndOfDirective();
+      return Result;
+    }
+    Token ParameterStartTok = CurTok;
+    IdentifierInfo *InitialID = CurTok.getIdentifierInfo();
+    Parameter.append(InitialID->getName());
+    for (LexNonComment(CurTok); CurTok.is(tok::coloncolon);
+         LexNonComment(CurTok)) {
+      Parameter.append("::");
+      LexNonComment(CurTok);
+      if (!CurTok.is(tok::identifier)) {
+        Diag(CurTok, diag::err_expected) << "identifier";
+        DiscardUntilEndOfDirective();
+        return Result;
+      }
+      IdentifierInfo *NextID = CurTok.getIdentifierInfo();
+      Parameter.append(NextID->getName());
+    }
+    // Lex the parameters (dependent on the parameter type we want!)
+    if (Parameter == "limit") {
+      // we have a limit parameter and its internals are processed using
+      // evaluation rules from #if - handle here
+      if (CurTok.isNot(tok::l_paren)) {
+        Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
+        DiscardUntilEndOfDirective();
+        return Result;
+      }
+      IdentifierInfo *ParameterIfNDef = nullptr;
+      DirectiveEvalResult LimitEvalResult =
+          EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
+      if (!LimitEvalResult.Value) {
+        return Result;
+      }
+      const llvm::APSInt &LimitResult = *LimitEvalResult.Value;
+      const bool ValueDoesNotFit =
+          LimitResult.getBitWidth() > 64
+              ? true
+              : (LimitResult.isUnsigned() ||
+                 (LimitResult.isSigned() && LimitResult.isNegative()));
+      if (ValueDoesNotFit) {
+        Diag(CurTok, diag::warn_pp_expr_overflow);
+        // just truncate and roll with that, I guess?
+        Result.MaybeLimitParam =
+            static_cast<size_t>(LimitResult.getRawData()[0]);
+      } else {
+        Result.MaybeLimitParam =
+            static_cast<size_t>(LimitResult.getZExtValue());
+      }
+      LexNonComment(CurTok);
+    } else if (Parameter == "clang::offset") {
+      // we have a limit parameter and its internals are processed using
+      // evaluation rules from #if - handle here
+      if (CurTok.isNot(tok::l_paren)) {
+        Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
+        DiscardUntilEndOfDirective();
+        return Result;
+      }
+      IdentifierInfo *ParameterIfNDef = nullptr;
+      DirectiveEvalResult OffsetEvalResult =
+          EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
+      if (!OffsetEvalResult.Value) {
+        return Result;
+      }
+      const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value;
+      if (OffsetResult.getBitWidth() > 64) {
+        Diag(CurTok, diag::warn_pp_expr_overflow);
+        // just truncate and roll with that, I guess?
+        Result.MaybeOffsetParam =
+            static_cast<size_t>(OffsetResult.getRawData()[0]);
+      } else {
+        Result.MaybeOffsetParam =
+            static_cast<size_t>(OffsetResult.getZExtValue());
+      }
+      LexNonComment(CurTok);
+    } else {
+      if (CurTok.is(tok::l_paren)) {
+        SmallVector<BracketType, 4> Brackets;
+        Brackets.push_back(BracketType::Paren);
+        auto ParseArgToken = [&]() {
+          for (LexNonComment(CurTok); CurTok.isNot(tok::eod);
+               LexNonComment(CurTok)) {
+            switch (CurTok.getKind()) {
+            default:
+              break;
+            case tok::l_paren:
+              Brackets.push_back(BracketType::Paren);
+              break;
+            case tok::r_paren:
+              if (Brackets.back() != BracketType::Paren) {
+                Diag(CurTok, diag::err_pp_expected_rparen);
+                return false;
+              }
+              Brackets.pop_back();
+              if (Brackets.empty()) {
+                return true;
+              }
+              break;
+            case tok::l_brace:
+              Brackets.push_back(BracketType::Brace);
+              break;
+            case tok::r_brace:
+              if (Brackets.back() != BracketType::Brace) {
+                Diag(CurTok, diag::err_expected) << "}";
+                return false;
+              }
+              Brackets.pop_back();
+              break;
+            case tok::l_square:
+              Brackets.push_back(BracketType::Square);
+              break;
+            case tok::r_square:
+              if (Brackets.back() != BracketType::Square) {
+                Diag(CurTok, diag::err_expected) << "]";
+                return false;
+              }
+              Brackets.pop_back();
+              break;
+            }
+            ParameterTokens.push_back(CurTok);
+          }
+          if (!Brackets.empty()) {
+            Diag(CurTok, diag::err_pp_expected_rparen);
+            DiscardUntilEndOfDirective();
+            return false;
+          }
+          return true;
+        };
+        if (!ParseArgToken()) {
+          return Result;
+        }
+        if (!CurTok.is(tok::r_paren)) {
+          Diag(CurTok, diag::err_pp_expected_rparen);
+          DiscardUntilEndOfDirective();
+          return Result;
+        }
+        Lex(CurTok);
+      }
+      // "Token-soup" parameters
+      if (Parameter == "if_empty") {
+        // TODO: integer list optimization
+        Result.MaybeIfEmptyParam = std::move(ParameterTokens);
+      } else if (Parameter == "prefix") {
+        // TODO: integer list optimization
+        Result.MaybePrefixParam = std::move(ParameterTokens);
+      } else if (Parameter == "suffix") {
+        // TODO: integer list optimization
+        Result.MaybeSuffixParam = std::move(ParameterTokens);
+      } else {
+        ++Result.UnrecognizedParams;
+        if (DiagnoseUnknown) {
+          Diag(ParameterStartTok, diag::warn_pp_unknown_parameter_ignored)
+              << 1 << Parameter;
+        }
+      }
+    }
+  }
+  Result.Successful = true;
+  return Result;
+}
+
+// This array must survive for an extended period of time
+inline constexpr const char *IntegerLiterals[] = {
+    "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",   "10",
+    "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",  "20",  "21",
+    "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",  "30",  "31",  "32",
+    "33",  "34",  "35",  "36",  "37",  "38",  "39",  "40",  "41",  "42",  "43",
+    "44",  "45",  "46",  "47",  "48",  "49",  "50",  "51",  "52",  "53",  "54",
+    "55",  "56",  "57",  "58",  "59",  "60",  "61",  "62",  "63",  "64",  "65",
+    "66",  "67",  "68",  "69",  "70",  "71",  "72",  "73",  "74",  "75",  "76",
+    "77",  "78",  "79",  "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",
+    "88",  "89",  "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",
+    "99",  "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
+    "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120",
+    "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131",
+    "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142",
+    "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153",
+    "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164",
+    "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175",
+    "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186",
+    "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197",
+    "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208",
+    "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
+    "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230",
+    "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241",
+    "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
+    "253", "254", "255"};
+
+void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc,
+                                              LexEmbedParametersResult &Params,
+                                              StringRef BinaryContents,
+                                              const size_t TargetCharWidth) {
+  (void)TargetCharWidth; // for later, when we support various sizes
+  size_t TokenIndex = 0;
+  const size_t InitListTokensSize = [&]() {
+    if (BinaryContents.empty()) {
+      if (Params.MaybeIfEmptyParam) {
+        return Params.MaybeIfEmptyParam->size();
+      } else {
+        return static_cast<size_t>(0);
+      }
+    } else {
+      return static_cast<size_t>(
+          (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) +
+          (BinaryContents.size() * 2 - 1) +
+          (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0));
+    }
+  }();
+  std::unique_ptr<Token[]> InitListTokens(new Token[InitListTokensSize]());
+
+  if (BinaryContents.empty()) {
+    if (Params.MaybeIfEmptyParam) {
+      std::copy(Params.MaybeIfEmptyParam->begin(),
+                Params.MaybeIfEmptyParam->end(), InitListTokens.get());
+      TokenIndex += Params.MaybeIfEmptyParam->size();
+      assert(TokenIndex == InitListTokensSize);
+      EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true,
+                       true);
+    }
+    return;
+  }
+
+  // FIXME: this does not take the target's byte size into account;
+  // will fail on many DSPs and embedded machines!
+  if (Params.MaybePrefixParam) {
+    std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(),
+              InitListTokens.get() + TokenIndex);
+    TokenIndex += Params.MaybePrefixParam->size();
+  }
+  for (size_t I = 0; I < BinaryContents.size(); ++I) {
+    unsigned char ByteValue = BinaryContents[I];
+    StringRef ByteRepresentation = IntegerLiterals[ByteValue];
+    const size_t InitListIndex = TokenIndex;
+    Token &IntToken = InitListTokens[InitListIndex];
+    IntToken.setKind(tok::numeric_constant);
+    IntToken.setLiteralData(ByteRepresentation.data());
+    IntToken.setLength(ByteRepresentation.size());
+    IntToken.setLocation(FilenameLoc);
+    ++TokenIndex;
+    bool AtEndOfContents = I == (BinaryContents.size() - 1);
+    if (!AtEndOfContents) {
+      const size_t CommaInitListIndex = InitListIndex + 1;
+      Token &CommaToken = InitListTokens[CommaInitListIndex];
+      CommaToken.setKind(tok::comma);
+      CommaToken.setLocation(FilenameLoc);
+      ++TokenIndex;
+    }
+  }
+  if (Params.MaybeSuffixParam) {
+    std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(),
+              InitListTokens.get() + TokenIndex);
+    TokenIndex += Params.MaybeSuffixParam->size();
+  }
+  assert(TokenIndex == InitListTokensSize);
+  EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false);
+}
+
+void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc,
+                                               LexEmbedParametersResult &Params,
+                                               StringRef BinaryContents,
+                                               const size_t TargetCharWidth) {
+  // TODO: implement direct built-in support
+  HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+                             TargetCharWidth);
+}
+
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+                                        const FileEntry *LookupFromFile) {
+  if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed
+                                          : diag::warn_cxx26_pp_embed);
+    Diag(EmbedTok, EitherDiag);
+  }
+
+  // Parse the filename header
+  Token FilenameTok;
+  if (LexHeaderName(FilenameTok))
+    return;
+
+  if (FilenameTok.isNot(tok::header_name)) {
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    if (FilenameTok.isNot(tok::eod))
+      DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // Parse the optional sequence of
+  // directive-parameters:
+  //     identifier parameter-name-list[opt] directive-argument-list[opt]
+  // directive-argument-list:
+  //    '(' balanced-token-sequence ')'
+  // parameter-name-list:
+  //    '::' identifier parameter-name-list[opt]
+  Token CurTok;
+  LexEmbedParametersResult Params = LexEmbedParameters(
+      CurTok, /*InHasEmbed=*/false, /*DiagnoseUnknown=*/true);
+
+  // Now, splat the data out!
+  SmallString<128> FilenameBuffer;
+  SmallString<512> SearchPath;
+  SmallString<512> RelativePath;
+  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+  SourceLocation FilenameLoc = FilenameTok.getLocation();
+  StringRef OriginalFilename = Filename;
+  bool isAngled =
+      GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  assert(!Filename.empty());
+  OptionalFileEntryRef MaybeFileRef =
+      this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
+                            LookupFromFile, &SearchPath, &RelativePath);
+  if (!MaybeFileRef) {
+    // could not find file
+    if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
+      return;
+    }
+    Diag(FilenameTok, diag::err_pp_file_not_found)
+        << Filename;
+    return;
+  }
+  std::optional<int64_t> MaybeSignedLimit{};
+  if (Params.MaybeLimitParam) {
+    if (static_cast<uint64_t>(INT64_MAX) >= *Params.MaybeLimitParam) {
+      MaybeSignedLimit = static_cast<int64_t>(*Params.MaybeLimitParam);
+    }
+  }
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile = getFileManager().getBufferForFile(
+      *MaybeFileRef, false, false, MaybeSignedLimit);
+  if (!MaybeFile) {
+    // could not find file
+    Diag(FilenameTok, diag::err_cannot_open_file)
+        << Filename << "a buffer to the contents could not be created";
+    return;
+  }
+  StringRef BinaryContents = MaybeFile.get()->getBuffer();
+  if (Params.MaybeOffsetParam) {
+    // offsets all the way to the end of the file make for an empty file.
+    const size_t OffsetParam = *Params.MaybeOffsetParam;
+    BinaryContents = BinaryContents.substr(OffsetParam);
+  }
+  const size_t TargetCharWidth = getTargetInfo().getCharWidth();
+  if (TargetCharWidth > 64) {
+    // Too wide for us to handle
+    Diag(EmbedTok, diag::err_pp_unsupported_directive)
+        << 1
+        << "CHAR_BIT is too wide for the target architecture to handle "
+           "properly";
+    return;
+  }
+  if (TargetCharWidth != 8) {
+    Diag(EmbedTok, diag::err_pp_unsupported_directive)
+        << 1
+        << "At the moment, we do not have the machinery to support non 8-bit "
+           "CHAR_BIT targets!";
+    return;
+  }
+  if (CHAR_BIT % TargetCharWidth != 0) {
+    Diag(EmbedTok, diag::err_pp_unsupported_directive)
+        << 1
+        << "CHAR_BIT is not evenly divisible by host architecture's byte "
+           "definition";
+    return;
+  }
+  if (Callbacks) {
+    CharSourceRange FilenameSourceRange(
+        SourceRange(FilenameTok.getLocation(), FilenameTok.getEndLoc()), true);
+    CharSourceRange ParametersRange(SourceRange(Params.StartLoc, Params.EndLoc),
+                                    true);
+    Callbacks->EmbedDirective(HashLoc, Filename, isAngled, FilenameSourceRange,
+                              ParametersRange, MaybeFileRef, SearchPath,
+                              RelativePath);
+  }
+  if (PPOpts->NoBuiltinPPEmbed) {
+    HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+                              TargetCharWidth);
+  } else {
+    // emit a token directly, handle it internally.
+    HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents,
+                                TargetCharWidth);
+  }
+}
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 269984aae07bf28..dda5717afc699da 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -868,7 +868,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
 /// may occur after a #if or #elif directive.  If the expression is equivalent
 /// to "!defined(X)" return X in IfNDefMacro.
 Preprocessor::DirectiveEvalResult
-Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                          Token &Tok, bool CheckForEoD,
+                                          bool Parenthesized) {
   SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
@@ -880,7 +882,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
   DisableMacroExpansion = false;
 
   // Peek ahead one token.
-  Token Tok;
   LexNonComment(Tok);
 
   // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
@@ -901,7 +902,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
     // We cannot trust the source range from the value because there was a
     // parse error. Track the range manually -- the end of the directive is the
     // end of the condition range.
-    return {false,
+    return {std::nullopt,
+            false,
             DT.IncludedUndefinedIds,
             {ExprStartLoc, ConditionRange.getEnd()}};
   }
@@ -917,7 +919,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+    const bool IsNonZero = ResVal.Val != 0;
+    const SourceRange ValRange = ResVal.getRange();
+    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+            ValRange};
   }
 
   // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
@@ -930,17 +935,34 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
+    const bool IsNonZero = ResVal.Val != 0;
+    const SourceRange ValRange = ResVal.getRange();
+    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+            ValRange};
   }
 
-  // If we aren't at the tok::eod token, something bad happened, like an extra
-  // ')' token.
-  if (Tok.isNot(tok::eod)) {
-    Diag(Tok, diag::err_pp_expected_eol);
-    DiscardUntilEndOfDirective();
+  if (CheckForEoD) {
+    // If we aren't at the tok::eod token, something bad happened, like an extra
+    // ')' token.
+    if (Tok.isNot(tok::eod)) {
+      Diag(Tok, diag::err_pp_expected_eol);
+      DiscardUntilEndOfDirective();
+    }
   }
 
   // Restore 'DisableMacroExpansion'.
   DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-  return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+  const bool IsNonZero = ResVal.Val != 0;
+  const SourceRange ValRange = ResVal.getRange();
+  return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange};
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive.  If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression(
+    IdentifierInfo *&IfNDefMacro, bool CheckForEoD, bool Parenthesized) {
+  Token Tok;
+  return EvaluateDirectiveExpression(IfNDefMacro, Tok, CheckForEoD,
+                                     Parenthesized);
 }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index b371f8cf7a9c072..6e0163ccc89b7fb 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() {
     Ident__has_c_attribute = nullptr;
 
   Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
+  Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed");
   Ident__has_include      = RegisterBuiltinMacro(*this, "__has_include");
   Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
   Ident__has_warning      = RegisterBuiltinMacro(*this, "__has_warning");
@@ -1264,6 +1265,114 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
   return File.has_value();
 }
 
+/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
+/// Returns a filled optional with the value if successful; otherwise, empty.
+int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+  // pedwarn for not being on C23
+  if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed
+                                          : diag::warn_cxx26_pp_has_embed);
+    Diag(Tok, EitherDiag);
+  }
+
+  // Save the location of the current token.  If a '(' is later found, use
+  // that location.  If not, use the end of this location instead.
+  SourceLocation LParenLoc = Tok.getLocation();
+
+  // These expressions are only allowed within a preprocessor directive.
+  if (!this->isParsingIfOrElifDirective()) {
+    Diag(LParenLoc, diag::err_pp_directive_required) << II;
+    // Return a valid identifier token.
+    assert(Tok.is(tok::identifier));
+    Tok.setIdentifierInfo(II);
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+  // Get '('. If we don't have a '(', try to form a header-name token.
+  do {
+    if (this->LexHeaderName(Tok)) {
+      return VALUE__STDC_EMBED_NOT_FOUND__;
+    }
+  } while (Tok.getKind() == tok::comment);
+
+  // Ensure we have a '('.
+  if (Tok.isNot(tok::l_paren)) {
+    // No '(', use end of last token.
+    LParenLoc = this->getLocForEndOfToken(LParenLoc);
+    this->Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
+    // If the next token looks like a filename or the start of one,
+    // assume it is and process it as such.
+    if (Tok.isNot(tok::header_name)) {
+      return VALUE__STDC_EMBED_NOT_FOUND__;
+    }
+  } else {
+    // Save '(' location for possible missing ')' message.
+    LParenLoc = Tok.getLocation();
+    if (this->LexHeaderName(Tok)) {
+      return VALUE__STDC_EMBED_NOT_FOUND__;
+    }
+  }
+
+  if (Tok.isNot(tok::header_name)) {
+    Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+  SourceLocation FilenameLoc = Tok.getLocation();
+  Token FilenameTok = Tok;
+
+  Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
+  if (!Params.Successful) {
+    if (Tok.isNot(tok::eod))
+      this->DiscardUntilEndOfDirective();
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+  if (Params.UnrecognizedParams > 0) {
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+  if (!Tok.is(tok::r_paren)) {
+    Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
+        << II << tok::r_paren;
+    Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+    DiscardUntilEndOfDirective();
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+
+  SmallString<128> FilenameBuffer;
+  SmallString<256> RelativePath;
+  StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
+  StringRef OriginalFilename = Filename;
+  bool isAngled =
+      this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  assert(!Filename.empty());
+  const FileEntry *LookupFromFile =
+      this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
+                               : nullptr;
+  OptionalFileEntryRef MaybeFileEntry =
+      this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
+                            LookupFromFile, nullptr,
+                            &RelativePath);
+  if (Callbacks) {
+    Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
+  }
+  if (!MaybeFileEntry) {
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+  size_t FileSize = MaybeFileEntry->getSize();
+  if (FileSize == 0 ||
+      (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) {
+    return VALUE__STDC_EMBED_EMPTY__;
+  }
+  if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) {
+    return VALUE__STDC_EMBED_EMPTY__;
+  }
+  return VALUE__STDC_EMBED_FOUND__;
+}
+
 bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
   return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr);
 }
@@ -1801,6 +1910,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
       return;
     OS << (int)Value;
     Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__has_embed) {
+    // The argument to these two builtins should be a parenthesized
+    // file name string literal using angle brackets (<>) or
+    // double-quotes (""), optionally followed by a series of
+    // arguments similar to form like attributes.
+    int Value = EvaluateHasEmbed(Tok, II);
+
+    if (Tok.isNot(tok::r_paren))
+      return;
+    OS << Value;
+    Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.
     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt
new file mode 100644
index 000000000000000..93d177a48c83ab8
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/jk.txt
@@ -0,0 +1 @@
+jk
\ No newline at end of file
diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt
new file mode 100644
index 000000000000000..1ce9ab967e4a154
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/art.txt
@@ -0,0 +1,9 @@
+           __  _
+       .-.'  `; `-._  __  _
+      (_,         .-:'  `; `-._
+    ,'o"(        (_,           )
+   (__,-'      ,'o"(            )>
+      (       (__,-'            )
+       `-'._.--._(             )
+          |||  |||`-'._.--._.-'
+                     |||  |||
diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty
new file mode 100644
index 000000000000000..e69de29bb2d1d64
diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt
new file mode 100644
index 000000000000000..63d8dbd40c23542
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/single_byte.txt
@@ -0,0 +1 @@
+b
\ No newline at end of file
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
new file mode 100644
index 000000000000000..80980e753614a5d
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 %s -E -embed-dir=%S/Inputs -CC -verify
+
+#if !__has_embed(__FILE__)
+#error 1
+#elif !__has_embed("media/art.txt")
+#error 2
+#elif __has_embed("asdkasdjkadsjkdsfjk")
+#error 3
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1))
+#error 4
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1))
+#error 5
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD"))
+#error 6
+#elif !__has_embed(__FILE__ limit(2) prefix(y))
+#error 7
+#elif !__has_embed(__FILE__ limit(2))
+#error 8
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x))
+#error 9
+#elif __has_embed(<media/empty>) != 2
+#error 10
+#elif __has_embed(<media/empty> limit(0)) != 2
+#error 11
+#elif __has_embed(<media/art.txt> limit(0)) != 2
+#error 12
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(1)) != 2
+#error 13
+#elif !__has_embed(<media/art.txt>)
+#error 14
+#elif !__has_embed(<media/art.txt> if_empty(meow))
+#error 14
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
new file mode 100644
index 000000000000000..fe0edb00e609837
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#if !__has_embed(__FILE__)
+#error 1
+#elif !__has_embed(__FILE__)
+#error 2
+#elif !__has_embed(__FILE__ suffix(x))
+#error 3
+#elif !__has_embed(__FILE__ suffix(x) limit(1))
+#error 4
+#elif !__has_embed(__FILE__ suffix(x) limit(1) prefix(1))
+#error 5
+#elif !__has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1))
+#error 6
+#elif !__has_embed(__FILE__ suffix(x) limit(0) prefix(1))
+#error 7
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != 2
+#error 8
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != 2
+#error 9
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != 2
+#error 10
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp
new file mode 100644
index 000000000000000..46787041ca23bec
--- /dev/null
+++ b/clang/test/Preprocessor/embed_feature_test.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -x c %s -E -CC -verify
+
+#if defined(__cplusplus)
+#if !defined(__cpp_pp_embed) || __cpp_pp_embed != 202403L
+#error 1
+#endif
+#endif
+
+#if !defined(__has_embed)
+#error 2
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_file_not_found.c b/clang/test/Preprocessor/embed_file_not_found.c
new file mode 100644
index 000000000000000..337fa4ac067ec71
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#embed <nfejfNejAKFe>
+// expected-error at -1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
new file mode 100644
index 000000000000000..cd517b7f216ac32
--- /dev/null
+++ b/clang/test/Preprocessor/embed_init.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+typedef struct kitty {
+	int purr;
+} kitty;
+
+typedef struct kitty_kitty {
+	int here;
+	kitty kit;
+} kitty_kitty;
+
+const int meow =
+#embed <single_byte.txt>
+;
+
+const kitty kit = {
+#embed <single_byte.txt>
+};
+
+const kitty_kitty kit_kit = {
+#embed <jk.txt>
+};
+
+_Static_assert(meow == 'b', "");
+_Static_assert(kit.purr == 'b', "");
+_Static_assert(kit_kit.here == 'j', "");
+_Static_assert(kit_kit.kit.purr == 'k', "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
new file mode 100644
index 000000000000000..ac1a768b27ffff9
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <media/empty> if_empty(123, 124, 125)
+};
+const char non_empty_data[] = {
+#embed <jk.txt> if_empty(123, 124, 125)
+};
+_Static_assert(sizeof(data) == 3, "");
+_Static_assert(123 == data[0], "");
+_Static_assert(124 == data[1], "");
+_Static_assert(125 == data[2], "");
+_Static_assert(sizeof(non_empty_data) == 2, "");
+_Static_assert('j' == non_empty_data[0], "");
+_Static_assert('k' == non_empty_data[1], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
new file mode 100644
index 000000000000000..28a94fe9430f033
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> limit(1)
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('j' == data[0], "");
+_Static_assert('k' == data[1], "");
+_Static_assert(sizeof(offset_data) == 1, "");
+_Static_assert('j' == offset_data[0], "");
+_Static_assert(offset_data[0] == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
new file mode 100644
index 000000000000000..71a029544dca556
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> clang::offset(1)
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('j' == data[0], "");
+_Static_assert('k' == data[1], "");
+_Static_assert(sizeof(offset_data) == 1, "");
+_Static_assert('k' == offset_data[0], "");
+_Static_assert(offset_data[0] == data[1], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
new file mode 100644
index 000000000000000..5182a2b874d3991
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> prefix('\xA', )
+};
+const char empty_data[] = {
+#embed <media/empty> prefix('\xA', )
+1
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('\xA' == data[0], "");
+_Static_assert('b' == data[1], "");
+_Static_assert(sizeof(empty_data) == 1, "");
+_Static_assert(1 == empty_data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
new file mode 100644
index 000000000000000..11c3f2bbbfb2bb6
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> suffix(, '\xA')
+};
+const char empty_data[] = {
+#embed <media/empty> suffix(, '\xA')
+1
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('b' == data[0], "");
+_Static_assert('\xA' == data[1], "");
+_Static_assert(sizeof(empty_data) == 1, "");
+_Static_assert(1 == empty_data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
new file mode 100644
index 000000000000000..1f043ccd2ff54bf
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#embed __FILE__ unrecognized
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized' ignored}}
+#embed __FILE__ unrecognized::param
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}}
+#embed __FILE__ unrecognized::param(with, args)
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}}
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
new file mode 100644
index 000000000000000..5c33871c0c8a4d8
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+const char data[] = {
+#embed <single_byte.txt>
+};
+_Static_assert(sizeof(data) == 1, "");
+_Static_assert('b' == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
new file mode 100644
index 000000000000000..791cd9176ebe0ab
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+const char data[] = {
+#embed "single_byte.txt"
+};
+_Static_assert(sizeof(data) == 1, "");
+_Static_assert('a' == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt
new file mode 100644
index 000000000000000..2e65efe2a145dda
--- /dev/null
+++ b/clang/test/Preprocessor/single_byte.txt
@@ -0,0 +1 @@
+a
\ No newline at end of file
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 103c08ffbe83b38..8f9d7c77ccd150f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -777,6 +777,13 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
     "Semicolon-separated list of components to include in libLLVM, or \"all\".")
 endif()
 
+option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON)
+# Quiet down MSVC-style secure CRT warnings
+if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS)
+  add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1)
+endif()
+
+
 if(MSVC)
   option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON)
   # Set this variable to OFF here so it can't be set with a command-line
diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake
index 1be13bc01ab9b25..828227f2f25a2f0 100644
--- a/llvm/cmake/modules/GetHostTriple.cmake
+++ b/llvm/cmake/modules/GetHostTriple.cmake
@@ -2,7 +2,7 @@
 # Invokes config.guess
 
 function( get_host_triple var )
-  if( MSVC )
+  if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") )
     if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" )
       set( value "aarch64-pc-windows-msvc" )
     elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" )
@@ -41,7 +41,7 @@ function( get_host_triple var )
     else()
       set( value "powerpc-ibm-aix" )
     endif()
-  else( MSVC )
+  else()
     if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS)
       message(WARNING "unable to determine host target triple")
     else()
@@ -55,6 +55,6 @@ function( get_host_triple var )
       endif( NOT TT_RV EQUAL 0 )
       set( value ${TT_OUT} )
     endif()
-  endif( MSVC )
+  endif()
   set( ${var} ${value} PARENT_SCOPE )
 endfunction( get_host_triple var )

>From 6a7a4c959f1635f5c3549010d277b5834a3e3fe2 Mon Sep 17 00:00:00 2001
From: ThePhD <phdofthehouse at gmail.com>
Date: Sun, 8 Oct 2023 17:43:51 -0400
Subject: [PATCH 02/50] =?UTF-8?q?=E2=9C=A8=20Speedy=20#embed=20implementat?=
 =?UTF-8?q?ion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

⚡ [Lex] Better reservations for improved performance/memory usage.

🛠 [Lex, Frontend] Remove comma hardcoding since we are servicing a full file

apply suggestions from git-clang-format
---
 clang/include/clang/AST/Expr.h                |  51 ++
 clang/include/clang/AST/RecursiveASTVisitor.h |   1 +
 .../clang/Basic/DiagnosticCommonKinds.td      |   6 +
 clang/include/clang/Basic/FileManager.h       |   5 +-
 clang/include/clang/Basic/StmtNodes.td        |   1 +
 clang/include/clang/Basic/TokenKinds.def      |   6 +-
 .../Frontend/PreprocessorOutputOptions.h      |   3 +-
 .../include/clang/Lex/PPDirectiveParameter.h  |  32 ++
 clang/include/clang/Lex/PPEmbedParameters.h   |  78 ++++
 clang/include/clang/Lex/Preprocessor.h        |  42 +-
 clang/include/clang/Sema/Sema.h               |  37 ++
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/AST/Expr.cpp                        |  16 +
 clang/lib/AST/ExprClassification.cpp          |   5 +
 clang/lib/AST/ExprConstant.cpp                |   8 +
 clang/lib/AST/ItaniumMangle.cpp               |   1 +
 clang/lib/AST/StmtPrinter.cpp                 |   7 +
 clang/lib/AST/StmtProfile.cpp                 |   2 +
 clang/lib/Basic/FileManager.cpp               |   1 -
 clang/lib/Basic/IdentifierTable.cpp           |   6 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   3 +-
 clang/lib/Format/TokenAnnotator.cpp           |   3 +-
 clang/lib/Frontend/DependencyFile.cpp         |  15 +-
 clang/lib/Frontend/DependencyGraph.cpp        |   2 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  14 +-
 clang/lib/Interpreter/Interpreter.cpp         |   1 +
 clang/lib/Lex/Lexer.cpp                       |   8 +
 clang/lib/Lex/PPDirectives.cpp                | 434 ++++++++++++++----
 clang/lib/Lex/PPMacroExpansion.cpp            |  23 +-
 clang/lib/Lex/Preprocessor.cpp                |   6 +-
 clang/lib/Parse/ParseExpr.cpp                 | 104 +++++
 clang/lib/Parse/ParseTemplate.cpp             |   2 +
 clang/lib/Sema/SemaDecl.cpp                   |  48 ++
 clang/lib/Sema/SemaDeclCXX.cpp                |   3 +-
 clang/lib/Sema/SemaExceptionSpec.cpp          |   1 +
 clang/lib/Sema/SemaExpr.cpp                   | 239 +++++++++-
 clang/lib/Sema/SemaTemplate.cpp               |  56 +++
 clang/lib/Sema/TreeTransform.h                |   6 +
 clang/lib/Serialization/ASTReaderStmt.cpp     |  13 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |  10 +
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |   4 +
 clang/test/Preprocessor/embed_art.c           | 106 +++++
 clang/test/Preprocessor/embed_single_entity.c |   7 +
 clang/test/Preprocessor/embed_weird.cpp       |  68 +++
 llvm/include/llvm/Support/Base64.h            |  36 +-
 45 files changed, 1351 insertions(+), 172 deletions(-)
 create mode 100644 clang/include/clang/Lex/PPDirectiveParameter.h
 create mode 100644 clang/include/clang/Lex/PPEmbedParameters.h
 create mode 100644 clang/test/Preprocessor/embed_art.c
 create mode 100644 clang/test/Preprocessor/embed_single_entity.c
 create mode 100644 clang/test/Preprocessor/embed_weird.cpp

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index b69c616b0090365..d3fba205c91c934 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4805,6 +4805,57 @@ class SourceLocExpr final : public Expr {
   friend class ASTStmtReader;
 };
 
+/// Represents a function call to __builtin_pp_embed().
+class PPEmbedExpr final : public Expr {
+  SourceLocation BuiltinLoc, RParenLoc;
+  DeclContext *ParentContext;
+  StringLiteral *Filename;
+  StringLiteral *BinaryData;
+
+public:
+  enum Action {
+    NotFound,
+    FoundOne,
+    Expanded,
+  };
+
+  PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, StringLiteral *Filename,
+              StringLiteral *BinaryData, SourceLocation BLoc,
+              SourceLocation RParenLoc, DeclContext *Context);
+
+  /// Build an empty call expression.
+  explicit PPEmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {}
+
+  /// If the PPEmbedExpr has been resolved return the subexpression
+  /// representing the resolved value. Otherwise return null.
+  const DeclContext *getParentContext() const { return ParentContext; }
+  DeclContext *getParentContext() { return ParentContext; }
+
+  SourceLocation getLocation() const { return BuiltinLoc; }
+  SourceLocation getBeginLoc() const { return BuiltinLoc; }
+  SourceLocation getEndLoc() const { return RParenLoc; }
+
+  StringLiteral *getFilenameStringLiteral() const { return Filename; }
+  StringLiteral *getDataStringLiteral() const { return BinaryData; }
+
+  size_t getDataElementCount(ASTContext &Context) const;
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+
+  const_child_range children() const {
+    return const_child_range(child_iterator(), child_iterator());
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == PPEmbedExprClass;
+  }
+
+private:
+  friend class ASTStmtReader;
+};
+
 /// Describes an C or C++ initializer list.
 ///
 /// InitListExpr describes an initializer list, which can be used to
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 3dd23eb38eeabfc..6b7211bb0a0d3f1 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2809,6 +2809,7 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
 DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
 DEF_TRAVERSE_STMT(StmtExpr, {})
 DEF_TRAVERSE_STMT(SourceLocExpr, {})
+DEF_TRAVERSE_STMT(PPEmbedExpr, {})
 
 DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
   TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index f2df283c74829f6..4df86e35eebde38 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -59,6 +59,9 @@ def err_expected_string_literal : Error<"expected string literal "
           "'external_source_symbol' attribute|"
           "as argument of '%1' attribute}0">;
 
+def err_builtin_pp_embed_invalid_argument : Error<
+  "invalid argument to '__builtin_pp_embed': %0">;
+
 def err_invalid_string_udl : Error<
   "string literal with user-defined suffix cannot be used here">;
 def err_invalid_character_udl : Error<
@@ -80,6 +83,9 @@ def err_expected : Error<"expected %0">;
 def err_expected_either : Error<"expected %0 or %1">;
 def err_expected_after : Error<"expected %1 after %0">;
 
+def err_builtin_pp_embed_invalid_location : Error<
+  "'__builtin_pp_embed' in invalid location: %0%select{|%2}1">;
+
 def err_param_redefinition : Error<"redefinition of parameter %0">;
 def warn_method_param_redefinition : Warning<"redefinition of method parameter %0">;
 def warn_method_param_declaration : Warning<"redeclaration of method parameter %0">,
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index c757f8775b425e9..cbfcb292778e5f7 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -282,8 +282,9 @@ class FileManager : public RefCountedBase<FileManager> {
   getBufferForFile(StringRef Filename, bool isVolatile = false,
                    bool RequiresNullTerminator = true,
                    std::optional<int64_t> MaybeLimit = std::nullopt) {
-    return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile,
-                                RequiresNullTerminator);
+    return getBufferForFileImpl(Filename,
+                                /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1),
+                                isVolatile, RequiresNullTerminator);
   }
 
 private:
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index cec301dfca2817b..e3be997dd1c86e0 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -203,6 +203,7 @@ def OpaqueValueExpr : StmtNode<Expr>;
 def TypoExpr : StmtNode<Expr>;
 def RecoveryExpr : StmtNode<Expr>;
 def BuiltinBitCastExpr : StmtNode<ExplicitCastExpr>;
+def PPEmbedExpr : StmtNode<Expr>;
 
 // Microsoft Extensions.
 def MSPropertyRefExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 19a66fbb0731194..167bd614efe7bd9 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -154,10 +154,6 @@ TOK(eod)                 // End of preprocessing directive (end of line inside a
                          // directive).
 TOK(code_completion)     // Code completion marker
 
-// #embed speed support
-TOK(builtin_embed)
-
-
 // C99 6.4.9: Comments.
 TOK(comment)             // Comment (only in -E -C[C] mode)
 
@@ -758,6 +754,7 @@ ALIAS("__char32_t"   , char32_t          , KEYCXX)
 KEYWORD(__builtin_bit_cast               , KEYALL)
 KEYWORD(__builtin_available              , KEYALL)
 KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL)
+KEYWORD(__builtin_pp_embed               , KEYALL)
 
 // Keywords defined by Attr.td.
 #ifndef KEYWORD_ATTRIBUTE
@@ -993,6 +990,7 @@ ANNOTATION(repl_input_end)
 #undef CXX11_KEYWORD
 #undef KEYWORD
 #undef PUNCTUATOR
+#undef BUILTINOK
 #undef TOK
 #undef C99_KEYWORD
 #undef C23_KEYWORD
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index 3e36db3f8ce46ea..0bc32c65a58d2d8 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -22,7 +22,8 @@ class PreprocessorOutputOptions {
   unsigned ShowMacroComments : 1;  ///< Show comments, even in macros.
   unsigned ShowMacros : 1;         ///< Print macro definitions.
   unsigned ShowIncludeDirectives : 1;  ///< Print includes, imports etc. within preprocessed output.
-  unsigned ShowEmbedDirectives : 1;  ///< Print embeds, etc. within preprocessed output.
+  unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed
+                                    ///< output.
   unsigned RewriteIncludes : 1;    ///< Preprocess include directives only.
   unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
   unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h
new file mode 100644
index 000000000000000..fc413c345adc539
--- /dev/null
+++ b/clang/include/clang/Lex/PPDirectiveParameter.h
@@ -0,0 +1,32 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+
+#include "clang/Basic/SourceLocation.h"
+
+namespace clang {
+
+/// Captures basic information about a preprocessor directive parameter.
+class PPDirectiveParameter {
+public:
+  SourceLocation Start;
+  SourceLocation End;
+
+  PPDirectiveParameter(SourceLocation Start, SourceLocation End)
+      : Start(Start), End(End) {}
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
new file mode 100644
index 000000000000000..7b76d2d573c23bd
--- /dev/null
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -0,0 +1,78 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+
+#include "clang/Lex/PPDirectiveParameter.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+
+/// Preprocessor extension embed parameter "clang::offset"
+/// `clang::offset( constant-expression )`
+class PPEmbedParameterOffset : public PPDirectiveParameter {
+public:
+  size_t Offset;
+
+  PPEmbedParameterOffset(size_t Offset, SourceLocation Start,
+                         SourceLocation End)
+      : Offset(Offset), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "limit"
+/// `limit( constant-expression )`
+class PPEmbedParameterLimit : public PPDirectiveParameter {
+public:
+  size_t Limit;
+
+  PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End)
+      : Limit(Limit), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "prefix"
+/// `prefix( balanced-token-seq )`
+class PPEmbedParameterPrefix : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+                         SourceLocation End)
+      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "suffix"
+/// `suffix( balanced-token-seq )`
+class PPEmbedParameterSuffix : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+                         SourceLocation End)
+      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "if_empty"
+/// `if_empty( balanced-token-seq )`
+class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
+                          SourceLocation End)
+      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 7470bf5882730cb..58012fb79559e22 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,6 +29,7 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PPEmbedParameters.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/APSInt.h"
@@ -1165,6 +1166,9 @@ class Preprocessor {
 
   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
 
+  /// Buffers for used #embed directives
+  std::vector<std::string> EmbedBuffers;
+
 public:
   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
                DiagnosticsEngine &diags, const LangOptions &LangOpts,
@@ -1735,15 +1739,15 @@ class Preprocessor {
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
   struct LexEmbedParametersResult {
-    bool Successful;
-    std::optional<size_t> MaybeLimitParam;
-    std::optional<size_t> MaybeOffsetParam;
-    std::optional<SmallVector<Token, 2>> MaybeIfEmptyParam;
-    std::optional<SmallVector<Token, 2>> MaybePrefixParam;
-    std::optional<SmallVector<Token, 2>> MaybeSuffixParam;
-    int UnrecognizedParams;
+    std::optional<PPEmbedParameterLimit> MaybeLimitParam;
+    std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
+    std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
+    std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
+    std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
     SourceLocation StartLoc;
     SourceLocation EndLoc;
+    int UnrecognizedParams;
+    bool Successful;
   };
 
   LexEmbedParametersResult LexEmbedParameters(Token &Current,
@@ -1812,7 +1816,8 @@ class Preprocessor {
   /// Parses a simple integer literal to get its numeric value.  Floating
   /// point literals and user defined literals are rejected.  Used primarily to
   /// handle pragmas that accept integer arguments.
-  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
+  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value,
+                                 bool WithLex = true);
 
   /// Disables macro expansion everywhere except for preprocessor directives.
   void SetMacroExpansionOnlyInDirectives() {
@@ -2441,8 +2446,7 @@ class Preprocessor {
   /// reference is for system \#include's or not (i.e. using <> instead of "").
   OptionalFileEntryRef
   LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
-                  bool OpenFile,
-                  const FileEntry *LookupFromFile = nullptr,
+                  bool OpenFile, const FileEntry *LookupFromFile = nullptr,
                   SmallVectorImpl<char> *SearchPath = nullptr,
                   SmallVectorImpl<char> *RelativePath = nullptr);
 
@@ -2735,12 +2739,18 @@ class Preprocessor {
   // Binary data inclusion
   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
                             const FileEntry *LookupFromFile = nullptr);
-  void HandleEmbedDirectiveNaive(
-      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
-      StringRef BinaryContents, const size_t TargetCharWidth);
-  void HandleEmbedDirectiveBuiltin(
-      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
-      StringRef BinaryContents, const size_t TargetCharWidth);
+  void HandleEmbedDirectiveNaive(SourceLocation HashLoc,
+                                 SourceLocation FilenameTok,
+                                 const LexEmbedParametersResult &Params,
+                                 StringRef BinaryContents,
+                                 const size_t TargetCharWidth);
+  void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc,
+                                   const Token &FilenameTok,
+                                   StringRef ResolvedFilename,
+                                   StringRef SearchPath, StringRef RelativePath,
+                                   const LexEmbedParametersResult &Params,
+                                   StringRef BinaryContents,
+                                   const size_t TargetCharWidth);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 2ebd21090ae4e11..d3c62d8e75650eb 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5981,6 +5981,10 @@ class Sema final {
                         ArrayRef<Expr *> Arg, SourceLocation RParenLoc,
                         Expr *Config = nullptr, bool IsExecConfig = false,
                         ADLCallKind UsesADL = ADLCallKind::NotADL);
+  /// `Fn` may be a null pointer.
+  void ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc,
+                               SmallVectorImpl<Expr *> &ArgExprs,
+                               SourceLocation RParenLoc);
 
   ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
                                      MultiExprArg ExecConfig,
@@ -6098,6 +6102,35 @@ class Sema final {
                                 SourceLocation BuiltinLoc,
                                 SourceLocation RPLoc);
 
+  // __builtin_pp_embed()
+  ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
+                              SourceLocation Base64DataLocation,
+                              SourceLocation RPLoc, StringLiteral *Filename,
+                              QualType DataTy, std::vector<char> BinaryData);
+
+  IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed);
+
+  PPEmbedExpr::Action
+  CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
+                              std::optional<QualType> MaybeInitType);
+  PPEmbedExpr::Action
+  ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
+                              SmallVectorImpl<Expr *> &OutputExprList,
+                              bool ClearOutputFirst = true);
+  PPEmbedExpr::Action
+  ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &OutputList);
+
+  enum PPEmbedExprContext {
+    PPEEC__StaticAssert,
+    PPEEC_StaticAssert,
+  };
+
+  StringRef GetLocationName(PPEmbedExprContext Context) const;
+
+  bool DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
+                           PPEmbedExprContext Context,
+                           bool SingleAllowed = true);
+
   // Build a potentially resolved SourceLocExpr.
   ExprResult BuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
                                 QualType ResultTy, SourceLocation BuiltinLoc,
@@ -8290,6 +8323,10 @@ class Sema final {
                                        SourceLocation EqualLoc,
                                        ParsedTemplateArgument DefaultArg);
 
+  void ModifyTemplateArguments(
+      const TemplateTy &Template,
+      SmallVectorImpl<ParsedTemplateArgument> &TemplateArgs);
+
   TemplateParameterList *
   ActOnTemplateParameterList(unsigned Depth,
                              SourceLocation ExportLoc,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5c32fbc079c9a65..138c52bc8149fc8 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1715,6 +1715,9 @@ enum StmtCode {
   /// A SourceLocExpr record.
   EXPR_SOURCE_LOC,
 
+  /// A PPEmbedExpr record.
+  EXPR_BUILTIN_PP_EMBED,
+
   /// A ShuffleVectorExpr record.
   EXPR_SHUFFLE_VECTOR,
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 4bfc4f082cd6a69..f0c0359cd9feaf9 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2392,6 +2392,21 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
   llvm_unreachable("unhandled case");
 }
 
+PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy,
+                         StringLiteral *Filename, StringLiteral *BinaryData,
+                         SourceLocation BLoc, SourceLocation RParenLoc,
+                         DeclContext *ParentContext)
+    : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary),
+      BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext),
+      Filename(Filename), BinaryData(BinaryData) {
+  setDependence(ExprDependence::None);
+}
+
+size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const {
+  return getDataStringLiteral()->getByteLength() /
+         (Context.getTypeSize(getType()) / Context.getTypeSize(Context.CharTy));
+}
+
 InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc,
                            ArrayRef<Expr *> initExprs, SourceLocation rbraceloc)
     : Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary),
@@ -3610,6 +3625,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CXXUuidofExprClass:
   case OpaqueValueExprClass:
   case SourceLocExprClass:
+  case PPEmbedExprClass:
   case ConceptSpecializationExprClass:
   case RequiresExprClass:
   case SYCLUniqueStableNameExprClass:
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index ffa7c6802ea6e19..fbbbd72b1445716 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::RequiresExprClass:
     return Cl::CL_PRValue;
 
+  case Expr::PPEmbedExprClass:
+    // Nominally, this just goes through as a PRValue until we actually expand
+    // it and check it.
+    return Cl::CL_PRValue;
+
   // Make HLSL this reference-like
   case Expr::CXXThisExprClass:
     return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e5539dedec02a4b..b6967cc97d78c5d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -8921,6 +8921,11 @@ class PointerExprEvaluator
     return true;
   }
 
+  bool VisitPPEmbedExpr(const PPEmbedExpr *E) {
+    llvm_unreachable("Not yet implemented for ExprConstant.cpp");
+    return true;
+  }
+
   bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) {
     std::string ResultStr = E->ComputeName(Info.Ctx);
 
@@ -16166,6 +16171,9 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
       return ICEDiag(IK_NotICE, E->getBeginLoc());
     return CheckICE(cast<CastExpr>(E)->getSubExpr(), Ctx);
   }
+  case Expr::PPEmbedExprClass: {
+    return ICEDiag(IK_ICE, E->getBeginLoc());
+  }
   }
 
   llvm_unreachable("Invalid StmtClass!");
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 23ec35cae4b7b40..f08fb766efd777d 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4721,6 +4721,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
   case Expr::PseudoObjectExprClass:
   case Expr::AtomicExprClass:
   case Expr::SourceLocExprClass:
+  case Expr::PPEmbedExprClass:
   case Expr::BuiltinBitCastExprClass:
   {
     NotPrimaryExpr();
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index a31aa0cfeeed8de..f94386be7788474 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -49,6 +49,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1145,6 +1146,12 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
   OS << Node->getBuiltinStr() << "()";
 }
 
+void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) {
+  OS << "__builtin_pp_embed(" << Node->getType() << ", "
+     << Node->getFilenameStringLiteral()->getBytes() << ", \""
+     << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")";
+}
+
 void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
   PrintExpr(Node->getSubExpr());
 }
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 22b6855b0fff23c..0be044f54a819ee 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2284,6 +2284,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) {
   VisitExpr(E);
 }
 
+void StmtProfiler::VisitPPEmbedExpr(const PPEmbedExpr *E) { VisitExpr(E); }
+
 void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); }
 
 void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) {
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index e0e80b5e0fbedbe..d8a5b56438ad33d 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -549,7 +549,6 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
   if (MaybeLimit)
     FileSize = *MaybeLimit;
 
-
   // If there's a high enough chance that the file have changed since we
   // got its size, force a stat before opening it.
   if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d2b5426d27bb3b2..96ac3663ca6658b 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -422,8 +422,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // collisions (if there were, the switch below would complain about duplicate
   // case values).  Note that this depends on 'if' being null terminated.
 
-#define HASH(LEN, FIRST, THIRD) \
-  (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63)
+#define HASH(LEN, FIRST, THIRD)                                                \
+  (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
 #define CASE(LEN, FIRST, THIRD, NAME) \
   case HASH(LEN, FIRST, THIRD): \
     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -438,7 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 4, 'e', 's', else);
   CASE( 4, 'l', 'n', line);
   CASE( 4, 's', 'c', sccs);
-  CASE( 5, 'e', 'b', embed);
+  CASE(5, 'e', 'b', embed);
   CASE( 5, 'e', 'd', endif);
   CASE( 5, 'e', 'r', error);
   CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fc2f749a34fc471..53a92502b463b57 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1324,7 +1324,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group});
+                   options::OPT_F, options::OPT_index_header_map,
+                   options::OPT_EmbedPath_Group});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index e405a9085951dc0..0a3c16f3a669c70 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1399,8 +1399,7 @@ class AnnotatingParser {
       if (Tok->isOneOf(Keywords.kw___has_include,
                        Keywords.kw___has_include_next)) {
         parseHasInclude();
-      }
-      else if (Tok->is(Keywords.kw___has_embed)) {
+      } else if (Tok->is(Keywords.kw___has_embed)) {
         parseHasEmbed();
       }
       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 10558b1d34bf623..04ddb92ff7f7b67 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -65,11 +65,11 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
-  void EmbedDirective(SourceLocation HashLoc,
-                          StringRef FileName, bool IsAngled,
-                          CharSourceRange FilenameRange, CharSourceRange ParametersRange,
-                          OptionalFileEntryRef File, StringRef SearchPath,
-                          StringRef RelativePath) override {
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override {
     if (!File)
       DepCollector.maybeAddDependency(FileName,
                                       /*FromModule*/ false,
@@ -97,14 +97,13 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
   }
 
   void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
-                  OptionalFileEntryRef File) override {
+                OptionalFileEntryRef File) override {
     if (!File)
       return;
     StringRef Filename =
         llvm::sys::path::remove_leading_dotslash(File->getName());
     DepCollector.maybeAddDependency(Filename,
-                                    /*FromModule=*/false,
-                                    false,
+                                    /*FromModule=*/false, false,
                                     /*IsModuleFile=*/false,
                                     &PP.getFileManager(),
                                     /*IsMissing=*/false);
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 683f751a94244ec..4049a5245de7d34 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -53,7 +53,7 @@ class DependencyGraphCallback : public PPCallbacks {
   DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
                           StringRef SysRoot,
                           DirectiveBehavior Action = IgnoreEmbed)
-    : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
+      : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {}
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index fb9baa92e6836d3..1d93ad97305da87 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -107,9 +107,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 
 public:
   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
-                           bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives,
-                           bool UseLineDirectives, bool MinimizeWhitespace,
-                           bool DirectivesOnly, bool KeepSystemIncludes)
+                           bool defines, bool DumpIncludeDirectives,
+                           bool DumpEmbedDirectives, bool UseLineDirectives,
+                           bool MinimizeWhitespace, bool DirectivesOnly,
+                           bool KeepSystemIncludes)
       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
         DisableLineMarkers(lineMarkers), DumpDefines(defines),
         DumpIncludeDirectives(DumpIncludeDirectives),
@@ -414,7 +415,7 @@ void PrintPPOutputPPCallbacks::EmbedDirective(
   if (DumpEmbedDirectives) {
     MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
     *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
-       << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
+        << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
     setEmittedDirectiveOnThisLine();
   }
 }
@@ -1002,8 +1003,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
-      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives,
-      Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
+      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives,
+      Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+      Opts.KeepSystemIncludes);
 
   // Expand macros in pragmas with -fms-extensions.  The assumption is that
   // the majority of pragmas in such a file will be Microsoft pragmas.
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 7968c62cbd3e7b3..e2e55daa77b854a 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -566,6 +566,7 @@ class RuntimeInterfaceBuilder
         CStyleCastPtrExpr(S, Ctx.VoidPtrTy, (uintptr_t)Ty.getAsOpaquePtr());
     // The QualType parameter `OpaqueType`, represented as `void*`.
     Args.push_back(TypeArg);
+    S.ModifyCallExprArguments(nullptr, E->getBeginLoc(), Args, E->getEndLoc());
 
     // We push the last parameter based on the type of the Expr. Note we need
     // special care for rvalue struct.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index feed1b9ecd71a8d..b55b4c360d44298 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -417,6 +417,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
     }
   }
 
+  // NOTE: this is to prevent a few cases where token streams with
+  // commas are used to print with pseudo-locations after a faux-expansion
+  // cause reading a bogus location from a source file that does not exist.
+  if (Tok.is(tok::comma)) {
+    Buffer = ",";
+    return 1;
+  }
+
   // NOTE: this can be checked even after testing for an IdentifierInfo.
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e0d98d7ca03fa11..1696c1a40c3d46b 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -42,11 +42,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <algorithm>
 #include <cassert>
+#include <cmath>
 #include <cstring>
 #include <new>
 #include <optional>
@@ -3631,10 +3633,12 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
   SmallVector<Token, 2> ParameterTokens;
   tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod;
   Result.StartLoc = CurTok.getLocation();
+  Result.EndLoc = CurTok.getLocation();
   for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) {
     Parameter.clear();
     // Lex identifier [:: identifier ...]
     if (!CurTok.is(tok::identifier)) {
+      Result.EndLoc = CurTok.getEndLoc();
       Diag(CurTok, diag::err_expected) << "identifier";
       DiscardUntilEndOfDirective();
       return Result;
@@ -3647,6 +3651,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       Parameter.append("::");
       LexNonComment(CurTok);
       if (!CurTok.is(tok::identifier)) {
+        Result.EndLoc = CurTok.getEndLoc();
         Diag(CurTok, diag::err_expected) << "identifier";
         DiscardUntilEndOfDirective();
         return Result;
@@ -3670,25 +3675,19 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
         return Result;
       }
       const llvm::APSInt &LimitResult = *LimitEvalResult.Value;
-      const bool ValueDoesNotFit =
-          LimitResult.getBitWidth() > 64
-              ? true
-              : (LimitResult.isUnsigned() ||
-                 (LimitResult.isSigned() && LimitResult.isNegative()));
-      if (ValueDoesNotFit) {
+      if (LimitResult.getBitWidth() > 64) {
         Diag(CurTok, diag::warn_pp_expr_overflow);
-        // just truncate and roll with that, I guess?
-        Result.MaybeLimitParam =
-            static_cast<size_t>(LimitResult.getRawData()[0]);
-      } else {
-        Result.MaybeLimitParam =
-            static_cast<size_t>(LimitResult.getZExtValue());
       }
+      size_t LimitValue = 0;
+      LimitValue = LimitResult.getLimitedValue();
+      Result.MaybeLimitParam = PPEmbedParameterLimit{
+          LimitValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()};
       LexNonComment(CurTok);
     } else if (Parameter == "clang::offset") {
       // we have a limit parameter and its internals are processed using
       // evaluation rules from #if - handle here
       if (CurTok.isNot(tok::l_paren)) {
+        Result.EndLoc = CurTok.getEndLoc();
         Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
         DiscardUntilEndOfDirective();
         return Result;
@@ -3697,18 +3696,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       DirectiveEvalResult OffsetEvalResult =
           EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
       if (!OffsetEvalResult.Value) {
+        Result.EndLoc = CurTok.getEndLoc();
         return Result;
       }
       const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value;
+      size_t OffsetValue;
       if (OffsetResult.getBitWidth() > 64) {
         Diag(CurTok, diag::warn_pp_expr_overflow);
-        // just truncate and roll with that, I guess?
-        Result.MaybeOffsetParam =
-            static_cast<size_t>(OffsetResult.getRawData()[0]);
-      } else {
-        Result.MaybeOffsetParam =
-            static_cast<size_t>(OffsetResult.getZExtValue());
       }
+      OffsetValue = OffsetResult.getLimitedValue();
+      Result.MaybeOffsetParam = PPEmbedParameterOffset{
+          OffsetValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()};
       LexNonComment(CurTok);
     } else {
       if (CurTok.is(tok::l_paren)) {
@@ -3764,6 +3762,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
           return true;
         };
         if (!ParseArgToken()) {
+          Result.EndLoc = CurTok.getEndLoc();
           return Result;
         }
         if (!CurTok.is(tok::r_paren)) {
@@ -3775,14 +3774,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       }
       // "Token-soup" parameters
       if (Parameter == "if_empty") {
-        // TODO: integer list optimization
-        Result.MaybeIfEmptyParam = std::move(ParameterTokens);
+        Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
+            std::move(ParameterTokens), ParameterStartTok.getLocation(),
+            CurTok.getLocation()};
       } else if (Parameter == "prefix") {
-        // TODO: integer list optimization
-        Result.MaybePrefixParam = std::move(ParameterTokens);
+        Result.MaybePrefixParam = PPEmbedParameterPrefix{
+            std::move(ParameterTokens), ParameterStartTok.getLocation(),
+            CurTok.getLocation()};
       } else if (Parameter == "suffix") {
-        // TODO: integer list optimization
-        Result.MaybeSuffixParam = std::move(ParameterTokens);
+        Result.MaybeSuffixParam = PPEmbedParameterSuffix{
+            std::move(ParameterTokens), ParameterStartTok.getLocation(),
+            CurTok.getLocation()};
       } else {
         ++Result.UnrecognizedParams;
         if (DiagnoseUnknown) {
@@ -3793,6 +3795,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
     }
   }
   Result.Successful = true;
+  Result.EndLoc = CurTok.getEndLoc();
   return Result;
 }
 
@@ -3823,89 +3826,327 @@ inline constexpr const char *IntegerLiterals[] = {
     "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
     "253", "254", "255"};
 
-void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc,
-                                              LexEmbedParametersResult &Params,
-                                              StringRef BinaryContents,
-                                              const size_t TargetCharWidth) {
-  (void)TargetCharWidth; // for later, when we support various sizes
-  size_t TokenIndex = 0;
-  const size_t InitListTokensSize = [&]() {
-    if (BinaryContents.empty()) {
-      if (Params.MaybeIfEmptyParam) {
-        return Params.MaybeIfEmptyParam->size();
+static size_t
+ComputeNaiveReserveSize(const Preprocessor::LexEmbedParametersResult &Params,
+                        StringRef TypeName, StringRef BinaryContents,
+                        SmallVectorImpl<char> &TokSpellingBuffer) {
+  size_t ReserveSize = 0;
+  if (BinaryContents.empty()) {
+    if (Params.MaybeIfEmptyParam) {
+      for (const auto &Tok : Params.MaybeIfEmptyParam->Tokens) {
+        const size_t TokLen = Tok.getLength();
+        if (TokLen > TokSpellingBuffer.size()) {
+          TokSpellingBuffer.resize(TokLen);
+        }
+        ReserveSize += TokLen;
+      }
+    }
+  } else {
+    if (Params.MaybePrefixParam) {
+      for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
+        const size_t TokLen = Tok.getLength();
+        if (TokLen > TokSpellingBuffer.size()) {
+          TokSpellingBuffer.resize(TokLen);
+        }
+        ReserveSize += TokLen;
+      }
+    }
+    for (const auto &Byte : BinaryContents) {
+      ReserveSize += 3 + TypeName.size(); // ((type-name)
+      if (Byte > 99) {
+        ReserveSize += 3; // ###
+      } else if (Byte > 9) {
+        ReserveSize += 2; // ##
       } else {
-        return static_cast<size_t>(0);
+        ReserveSize += 1; // #
       }
-    } else {
-      return static_cast<size_t>(
-          (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) +
-          (BinaryContents.size() * 2 - 1) +
-          (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0));
+      ReserveSize += 2; // ),
     }
-  }();
-  std::unique_ptr<Token[]> InitListTokens(new Token[InitListTokensSize]());
+    if (Params.MaybePrefixParam) {
+      for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
+        const size_t TokLen = Tok.getLength();
+        if (TokLen > TokSpellingBuffer.size()) {
+          TokSpellingBuffer.resize(TokLen);
+        }
+        ReserveSize += TokLen;
+      }
+    }
+  }
+  return ReserveSize;
+}
 
+void Preprocessor::HandleEmbedDirectiveNaive(
+    SourceLocation HashLoc, SourceLocation FilenameLoc,
+    const LexEmbedParametersResult &Params, StringRef BinaryContents,
+    const size_t TargetCharWidth) {
+  // Load up a new embed buffer for this file and set of parameters in
+  // particular.
+  EmbedBuffers.push_back("");
+  size_t EmbedBufferNumber = EmbedBuffers.size();
+  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
+  llvm::Twine EmbedBufferName = [](const std::string &Number) {
+    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
+    return PrefixNumber.concat(">");
+  }(EmbedBufferNumberVal);
+  std::string &TargetEmbedBuffer = EmbedBuffers.back();
+  const size_t TotalSize = BinaryContents.size();
+  // In the future, this might change/improve.
+  const StringRef TypeName = "unsigned char";
+
+  SmallVector<char, 32> TokSpellingBuffer(32, 0);
+  const size_t ReserveSize = ComputeNaiveReserveSize(
+      Params, TypeName, BinaryContents, TokSpellingBuffer);
+  TargetEmbedBuffer.reserve(ReserveSize);
+
+  // Generate the look-alike source file
   if (BinaryContents.empty()) {
     if (Params.MaybeIfEmptyParam) {
-      std::copy(Params.MaybeIfEmptyParam->begin(),
-                Params.MaybeIfEmptyParam->end(), InitListTokens.get());
-      TokenIndex += Params.MaybeIfEmptyParam->size();
-      assert(TokenIndex == InitListTokensSize);
-      EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true,
-                       true);
+      const PPEmbedParameterIfEmpty &EmptyParam = *Params.MaybeIfEmptyParam;
+      for (const auto &Tok : EmptyParam.Tokens) {
+        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+      }
+    }
+  } else {
+    if (Params.MaybePrefixParam) {
+      const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
+      for (const auto &Tok : PrefixParam.Tokens) {
+        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+      }
+    }
+    for (size_t I = 0; I < TotalSize; ++I) {
+      unsigned char ByteValue = BinaryContents[I];
+      StringRef ByteRepresentation = IntegerLiterals[ByteValue];
+      TargetEmbedBuffer.append(2, '(');
+      TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
+      TargetEmbedBuffer.append(1, ')');
+      TargetEmbedBuffer.append(ByteRepresentation.data(),
+                               ByteRepresentation.size());
+      TargetEmbedBuffer.append(1, ')');
+      bool AtEndOfContents = I == (TotalSize - 1);
+      if (!AtEndOfContents) {
+        TargetEmbedBuffer.append(1, ',');
+      }
+    }
+    if (Params.MaybeSuffixParam) {
+      const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
+      for (const auto &Tok : SuffixParam.Tokens) {
+        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+      }
     }
-    return;
   }
 
-  // FIXME: this does not take the target's byte size into account;
-  // will fail on many DSPs and embedded machines!
+  // Create faux-file and its ID, backed by a memory buffer.
+  std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
+      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+  assert(EmbedMemBuffer && "Cannot create predefined source buffer");
+  FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
+  assert(EmbedBufferFID.isValid() &&
+         "Could not create FileID for #embed directive?");
+  // Start parsing the look-alike source file for the embed directive and
+  // pretend everything is normal
+  // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
+  EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
+}
+
+static bool TokenListIsCharacterArray(Preprocessor &PP,
+                                      const size_t TargetCharWidth,
+                                      bool IsPrefix,
+                                      const SmallVectorImpl<Token> &Tokens,
+                                      llvm::SmallVectorImpl<char> &Output) {
+  const bool IsSuffix = !IsPrefix;
+  size_t MaxValue =
+      static_cast<size_t>(std::pow((size_t)2, TargetCharWidth)) - 1u;
+  size_t TokenIndex = 0;
+  // if it's a suffix, we are expecting a comma first
+  // if it's a prefix, we are expecting a numeric literal first
+  bool ExpectingNumericLiteral = IsPrefix;
+  const size_t TokensSize = Tokens.size();
+  if (Tokens.empty()) {
+    return true;
+  }
+  for (; TokenIndex < TokensSize;
+       (void)++TokenIndex, ExpectingNumericLiteral = !ExpectingNumericLiteral) {
+    const Token &Tok = Tokens[TokenIndex];
+    // TODO: parse an optional, PLAIN `(unsigned char)` cast in front of the
+    // literals, since the Spec technically decrees each element is of type
+    // `unsigned char` (unless we have a potential future extension for
+    // `clang::type(meow)` as an embed parameter
+    if (ExpectingNumericLiteral) {
+      if (Tok.isNot(tok::numeric_constant)) {
+        return false;
+      }
+      uint64_t Value = {};
+      Token ParsingTok = Tok;
+      if (!PP.parseSimpleIntegerLiteral(ParsingTok, Value, false)) {
+        // numeric literal is a floating point literal or a UDL; too complex for
+        // us
+        return false;
+      }
+      if (Value > MaxValue || Value > static_cast<uint64_t>(0xFF)) {
+        // number is too large
+        return false;
+      }
+      Output.push_back((char)Value);
+    } else {
+      if (Tok.isNot(tok::comma)) {
+        return false;
+      }
+    }
+  }
+  const bool EndedOnNumber = !ExpectingNumericLiteral;
+  if (IsPrefix && EndedOnNumber) {
+    // we ended on a number: this is a failure for prefix!
+    return false;
+  }
+  const bool EndedOnComma = ExpectingNumericLiteral;
+  if (IsSuffix && EndedOnComma) {
+    // we ended on a comma: this is a failure for suffix!
+    return false;
+  }
+  // if all tokens have been consumed by the above process, then we have
+  // succeeded.
+  return TokenIndex == TokensSize;
+}
+
+static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1,
+                               StringRef Bytes2, std::string &OutputBuffer) {
+  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                              "abcdefghijklmnopqrstuvwxyz"
+                              "0123456789+/";
+  const size_t TotalSize = Bytes0.size() + Bytes1.size() + Bytes2.size();
+  const size_t Bytes0Size = Bytes0.size();
+  const size_t Bytes01Size = Bytes0.size() + Bytes1.size();
+  const size_t IndexOffset = OutputBuffer.size();
+  OutputBuffer.resize(OutputBuffer.size() + (((TotalSize + 2) / 3) * 4));
+  auto IndexInto = [&](size_t i) -> unsigned char {
+    if (i >= Bytes0Size) {
+      if (i >= Bytes01Size) {
+        return Bytes2[i - Bytes01Size];
+      }
+      return Bytes1[i - Bytes0Size];
+    }
+    return Bytes0[i];
+  };
+
+  size_t i = 0, j = 0;
+  for (size_t n = TotalSize / 3 * 3; i < n; i += 3, j += 4) {
+    uint32_t x = ((unsigned char)IndexInto(i) << 16) |
+                 ((unsigned char)IndexInto(i + 1) << 8) |
+                 (unsigned char)IndexInto(i + 2);
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
+  }
+  if (i + 1 == TotalSize) {
+    uint32_t x = ((unsigned char)IndexInto(i) << 16);
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = '=';
+    OutputBuffer[IndexOffset + j + 3] = '=';
+  } else if (i + 2 == TotalSize) {
+    uint32_t x = ((unsigned char)IndexInto(i) << 16) |
+                 ((unsigned char)IndexInto(i + 1) << 8);
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = '=';
+  }
+}
+
+void Preprocessor::HandleEmbedDirectiveBuiltin(
+    SourceLocation HashLoc, const Token &FilenameTok,
+    StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
+    const LexEmbedParametersResult &Params, StringRef BinaryContents,
+    const size_t TargetCharWidth) {
+  // if it's empty, just process it like a normal expanded token stream
+  if (BinaryContents.empty()) {
+    HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+                              BinaryContents, TargetCharWidth);
+    return;
+  }
+  SmallVector<char, 2> BinaryPrefix{};
+  SmallVector<char, 2> BinarySuffix{};
   if (Params.MaybePrefixParam) {
-    std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(),
-              InitListTokens.get() + TokenIndex);
-    TokenIndex += Params.MaybePrefixParam->size();
-  }
-  for (size_t I = 0; I < BinaryContents.size(); ++I) {
-    unsigned char ByteValue = BinaryContents[I];
-    StringRef ByteRepresentation = IntegerLiterals[ByteValue];
-    const size_t InitListIndex = TokenIndex;
-    Token &IntToken = InitListTokens[InitListIndex];
-    IntToken.setKind(tok::numeric_constant);
-    IntToken.setLiteralData(ByteRepresentation.data());
-    IntToken.setLength(ByteRepresentation.size());
-    IntToken.setLocation(FilenameLoc);
-    ++TokenIndex;
-    bool AtEndOfContents = I == (BinaryContents.size() - 1);
-    if (!AtEndOfContents) {
-      const size_t CommaInitListIndex = InitListIndex + 1;
-      Token &CommaToken = InitListTokens[CommaInitListIndex];
-      CommaToken.setKind(tok::comma);
-      CommaToken.setLocation(FilenameLoc);
-      ++TokenIndex;
+    // If we ahve a prefix, validate that it's a good fit for direct data
+    // embedded (and prepare to prepend it)
+    const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
+    if (!TokenListIsCharacterArray(*this, TargetCharWidth, true,
+                                   PrefixParam.Tokens, BinaryPrefix)) {
+      HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+                                BinaryContents, TargetCharWidth);
+      return;
     }
   }
   if (Params.MaybeSuffixParam) {
-    std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(),
-              InitListTokens.get() + TokenIndex);
-    TokenIndex += Params.MaybeSuffixParam->size();
+    // If we ahve a prefix, validate that it's a good fit for direct data
+    // embedding (and prepare to append it)
+    const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
+    if (!TokenListIsCharacterArray(*this, TargetCharWidth, false,
+                                   SuffixParam.Tokens, BinarySuffix)) {
+      HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+                                BinaryContents, TargetCharWidth);
+      return;
+    }
   }
-  assert(TokenIndex == InitListTokensSize);
-  EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false);
-}
 
-void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc,
-                                               LexEmbedParametersResult &Params,
-                                               StringRef BinaryContents,
-                                               const size_t TargetCharWidth) {
-  // TODO: implement direct built-in support
-  HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
-                             TargetCharWidth);
+  // Load up a new embed buffer for this file and set of parameters in
+  // particular.
+  EmbedBuffers.push_back("");
+  size_t EmbedBufferNumber = EmbedBuffers.size();
+  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
+  llvm::Twine EmbedBufferName = [](const std::string &Number) {
+    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
+    return PrefixNumber.concat(">");
+  }(EmbedBufferNumberVal);
+  std::string &TargetEmbedBuffer = EmbedBuffers.back();
+  StringRef TypeName = "unsigned char";
+  const size_t TotalSize =
+      BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size();
+  const size_t ReserveSize =        // add up for necessary size:
+      19                            // __builtin_pp_embed(
+      + TypeName.size()             // type-name
+      + 2                           // ,"
+      + ResolvedFilename.size()     // file-name
+      + 3                           // ","
+      + (((TotalSize + 2) / 3) * 4) // base64-string
+      + 2                           // ");
+      ;
+  // Reserve appropriate size
+  TargetEmbedBuffer.reserve(ReserveSize);
+
+  // Generate the look-alike source file
+  TargetEmbedBuffer.append("__builtin_pp_embed(");
+  TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
+  TargetEmbedBuffer.append(",\"");
+  TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size());
+  TargetEmbedBuffer.append("\",\"");
+  // include the prefix(...) and suffix(...) binary data in the total contents
+  TripleEncodeBase64(
+      StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents,
+      StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer);
+  TargetEmbedBuffer.append("\")");
+  // Create faux-file and its ID, backed by a memory buffer.
+  std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
+      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+  assert(EmbedMemBuffer && "Cannot create predefined source buffer");
+  FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
+  assert(EmbedBufferFID.isValid() &&
+         "Could not create FileID for #embed directive?");
+  // Start parsing the look-alike source file for the embed directive and
+  // pretend everything is normal
+  // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
+  EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
 }
 
 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                                         const FileEntry *LookupFromFile) {
   if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
-    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed
-                                          : diag::warn_cxx26_pp_embed);
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_embed
+                                          : diag::warn_c23_pp_embed);
     Diag(EmbedTok, EitherDiag);
   }
 
@@ -3952,18 +4193,16 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
     if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
       return;
     }
-    Diag(FilenameTok, diag::err_pp_file_not_found)
-        << Filename;
+    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
     return;
   }
   std::optional<int64_t> MaybeSignedLimit{};
   if (Params.MaybeLimitParam) {
-    if (static_cast<uint64_t>(INT64_MAX) >= *Params.MaybeLimitParam) {
-      MaybeSignedLimit = static_cast<int64_t>(*Params.MaybeLimitParam);
-    }
+    MaybeSignedLimit = static_cast<int64_t>(Params.MaybeLimitParam->Limit);
   }
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile = getFileManager().getBufferForFile(
-      *MaybeFileRef, false, false, MaybeSignedLimit);
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile =
+      getFileManager().getBufferForFile(*MaybeFileRef, false, false,
+                                        MaybeSignedLimit);
   if (!MaybeFile) {
     // could not find file
     Diag(FilenameTok, diag::err_cannot_open_file)
@@ -3973,7 +4212,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   StringRef BinaryContents = MaybeFile.get()->getBuffer();
   if (Params.MaybeOffsetParam) {
     // offsets all the way to the end of the file make for an empty file.
-    const size_t OffsetParam = *Params.MaybeOffsetParam;
+    const size_t &OffsetParam = Params.MaybeOffsetParam->Offset;
     BinaryContents = BinaryContents.substr(OffsetParam);
   }
   const size_t TargetCharWidth = getTargetInfo().getCharWidth();
@@ -4009,11 +4248,12 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                               RelativePath);
   }
   if (PPOpts->NoBuiltinPPEmbed) {
-    HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+    HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents,
                               TargetCharWidth);
   } else {
     // emit a token directly, handle it internally.
-    HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents,
+    HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath,
+                                RelativePath, Params, BinaryContents,
                                 TargetCharWidth);
   }
 }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 6e0163ccc89b7fb..7f6c964b0d68a3b 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1270,8 +1270,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
 int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   // pedwarn for not being on C23
   if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
-    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed
-                                          : diag::warn_cxx26_pp_has_embed);
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
+                                          : diag::warn_c23_pp_has_embed);
     Diag(Tok, EitherDiag);
   }
 
@@ -1321,7 +1321,8 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   SourceLocation FilenameLoc = Tok.getLocation();
   Token FilenameTok = Tok;
 
-  Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
+  Preprocessor::LexEmbedParametersResult Params =
+      this->LexEmbedParameters(Tok, true, false);
   if (!Params.Successful) {
     if (Tok.isNot(tok::eod))
       this->DiscardUntilEndOfDirective();
@@ -1339,7 +1340,6 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     return VALUE__STDC_EMBED_NOT_FOUND__;
   }
 
-
   SmallString<128> FilenameBuffer;
   SmallString<256> RelativePath;
   StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
@@ -1351,11 +1351,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   assert(!Filename.empty());
   const FileEntry *LookupFromFile =
       this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
-                               : nullptr;
+                                  : nullptr;
   OptionalFileEntryRef MaybeFileEntry =
       this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
-                            LookupFromFile, nullptr,
-                            &RelativePath);
+                            LookupFromFile, nullptr, &RelativePath);
   if (Callbacks) {
     Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
   }
@@ -1363,11 +1362,15 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     return VALUE__STDC_EMBED_NOT_FOUND__;
   }
   size_t FileSize = MaybeFileEntry->getSize();
-  if (FileSize == 0 ||
-      (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) {
+  if (Params.MaybeLimitParam) {
+    if (FileSize > Params.MaybeLimitParam->Limit) {
+      FileSize = Params.MaybeLimitParam->Limit;
+    }
+  }
+  if (FileSize == 0) {
     return VALUE__STDC_EMBED_EMPTY__;
   }
-  if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) {
+  if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) {
     return VALUE__STDC_EMBED_EMPTY__;
   }
   return VALUE__STDC_EMBED_FOUND__;
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index ede4c51487ffbe7..10eb6d268b37b1d 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1411,7 +1411,8 @@ bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
   return true;
 }
 
-bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
+bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value,
+                                             bool WithLex) {
   assert(Tok.is(tok::numeric_constant));
   SmallString<8> IntegerBuffer;
   bool NumberInvalid = false;
@@ -1426,7 +1427,8 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
   llvm::APInt APVal(64, 0);
   if (Literal.GetIntegerValue(APVal))
     return false;
-  Lex(Tok);
+  if (WithLex)
+    Lex(Tok);
   Value = APVal.getLimitedValue();
   return true;
 }
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 9dbfc1c8c5e9ffe..ef3ae580a43aeb9 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -32,6 +32,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/TypoCorrection.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Base64.h"
 #include <optional>
 using namespace clang;
 
@@ -741,6 +742,8 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 };
 }
 
+// clang-format off
+
 /// Parse a cast-expression, or, if \pisUnaryExpression is true, parse
 /// a unary-expression.
 ///
@@ -805,6 +808,7 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 /// [MS]    '__builtin_FUNCSIG' '(' ')'
 /// [GNU]   '__builtin_LINE' '(' ')'
 /// [CLANG] '__builtin_COLUMN' '(' ')'
+/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')'
 /// [GNU]   '__builtin_source_location' '(' ')'
 /// [GNU]   '__builtin_types_compatible_p' '(' type-name ',' type-name ')'
 /// [GNU]   '__null'
@@ -924,6 +928,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 ///                   '__is_rvalue_expr'
 /// \endverbatim
 ///
+
+// clang-format on
+
 ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
                                        bool isAddressOfOperand,
                                        bool &NotCastExpr,
@@ -1345,6 +1352,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
   case tok::kw___builtin_FUNCSIG:
   case tok::kw___builtin_LINE:
   case tok::kw___builtin_source_location:
+  case tok::kw___builtin_pp_embed:
     if (NotPrimaryExpression)
       *NotPrimaryExpression = true;
     // This parses the complete suffix; we can return early.
@@ -2145,6 +2153,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
       } else {
         Expr *Fn = LHS.get();
         SourceLocation RParLoc = Tok.getLocation();
+        Actions.ModifyCallExprArguments(Fn, Loc, ArgExprs, RParLoc);
         LHS = Actions.ActOnCallExpr(getCurScope(), Fn, Loc, ArgExprs, RParLoc,
                                     ExecConfig);
         if (LHS.isInvalid()) {
@@ -2560,6 +2569,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
   return Operand;
 }
 
+// clang-format off
+
 /// ParseBuiltinPrimaryExpression
 ///
 /// \verbatim
@@ -2575,6 +2586,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
 /// [MS]    '__builtin_FUNCSIG' '(' ')'
 /// [GNU]   '__builtin_LINE' '(' ')'
 /// [CLANG] '__builtin_COLUMN' '(' ')'
+/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')'
 /// [GNU]   '__builtin_source_location' '(' ')'
 /// [OCL]   '__builtin_astype' '(' assignment-expression ',' type-name ')'
 ///
@@ -2583,6 +2595,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
 /// [GNU]   offsetof-member-designator '.' identifier
 /// [GNU]   offsetof-member-designator '[' expression ']'
 /// \endverbatim
+
+// clang-format on
 ExprResult Parser::ParseBuiltinPrimaryExpression() {
   ExprResult Res;
   const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo();
@@ -2841,6 +2855,96 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen());
     break;
   }
+  case tok::kw___builtin_pp_embed: {
+    SourceRange DataTyExprSourceRange{};
+    TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
+
+    if (ExpectAndConsume(tok::comma)) {
+      SkipUntil(tok::r_paren, StopAtSemi);
+      Res = ExprError();
+    }
+
+    ExprResult FilenameArgExpr(ParseStringLiteralExpression());
+
+    if (ExpectAndConsume(tok::comma)) {
+      SkipUntil(tok::r_paren, StopAtSemi);
+      Res = ExprError();
+    }
+
+    ExprResult Base64ArgExpr(ParseStringLiteralExpression());
+
+    if (Tok.isNot(tok::r_paren)) {
+      Diag(Tok, diag::err_expected) << tok::r_paren;
+      Res = ExprError();
+    }
+
+    const ASTContext &Context = Actions.getASTContext();
+    QualType DataTy = Context.UnsignedCharTy;
+    size_t TargetWidth = Context.getTypeSize(DataTy);
+    if (DataTyExpr.isInvalid()) {
+      Res = ExprError();
+    } else {
+      DataTy = DataTyExpr.get().get().getCanonicalType();
+      TargetWidth = Context.getTypeSize(DataTy);
+      if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+          DataTy.getUnqualifiedType() != Context.CharTy) {
+        // TODO: check if is exactly the same as unsigned char
+        Diag(DataTyExprSourceRange.getBegin(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "only 'char' and 'unsigned char' are supported";
+        Res = ExprError();
+      }
+      if ((TargetWidth % CHAR_BIT) != 0) {
+        Diag(DataTyExprSourceRange.getBegin(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "width of element type is not a multiple of host platform's "
+               "CHAR_BIT!";
+        Res = ExprError();
+      }
+    }
+
+    StringLiteral *FilenameLiteral = nullptr;
+    if (FilenameArgExpr.isInvalid()) {
+      Res = ExprError();
+    } else {
+      FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+    }
+
+    std::vector<char> BinaryData{};
+    if (Base64ArgExpr.isInvalid()) {
+      Res = ExprError();
+    } else {
+      StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+      StringRef Base64StrData = Base64Str->getBytes();
+      if (Base64Str->getKind() != StringLiteral::Ordinary) {
+        Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+            << 0
+            << "'__builtin_pp_embed' with valid base64 encoding that is an "
+               "ordinary \"...\" string";
+      }
+      const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+        Diag(Base64Str->getExprLoc(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "expected a valid base64 encoded string";
+      };
+      llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+      llvm::handleAllErrors(std::move(Err), OnDecodeError);
+      if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+        Diag(DataTyExprSourceRange.getBegin(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "size of data does not split evently into the number of bytes "
+               "requested";
+        Res = ExprError();
+      }
+    }
+
+    if (!Res.isInvalid()) {
+      Res = Actions.ActOnPPEmbedExpr(
+          StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(),
+          FilenameLiteral, DataTy, std::move(BinaryData));
+    }
+    break;
+  }
   }
 
   if (Res.isInvalid())
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index f556d0e6d4f8b6e..8364519861fe4f3 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -1671,6 +1671,8 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
     // arguments.
   } while (TryConsumeToken(tok::comma));
 
+  Actions.ModifyTemplateArguments(Template, TemplateArgs);
+
   return false;
 }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f249d41bc9bfbb6..44d8ddba080d82e 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13336,6 +13336,54 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
     return;
   }
 
+  // Adjust the init expression for PPEmbedExpr as early as possible
+  // here.
+  bool AlreadyAdjustedPPEmbedExpr = false;
+  if (InitListExpr *ILExpr = dyn_cast_if_present<InitListExpr>(Init); ILExpr) {
+    QualType VDeclTy = VDecl->getType();
+    ArrayRef<Expr *> Inits = ILExpr->inits();
+    if (CheckExprListForPPEmbedExpr(Inits, VDeclTy) == PPEmbedExpr::FoundOne) {
+      PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Inits[0]);
+      ILExpr->setInit(0, PPEmbed->getDataStringLiteral());
+      AlreadyAdjustedPPEmbedExpr = true;
+    }
+  }
+
+  if (!AlreadyAdjustedPPEmbedExpr) {
+    // If there is a PPEmbedExpr as a single initializer without braces,
+    // make sure it only produces a single element (and then expand said
+    // element).
+    if (PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Init);
+        PPEmbed) {
+      if (PPEmbed->getDataElementCount(Context) == 1) {
+        // Expand the list in-place immediately, let the natural work take hold
+        Init = ExpandSinglePPEmbedExpr(PPEmbed);
+      } else {
+        // `__builtin_pp_embed( ... )` only produces 2 or more values.
+        Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type)
+            << "'__builtin_pp_embed'";
+        RealDecl->setInvalidDecl();
+        return;
+      }
+    }
+
+    // Legitimately, in all other cases, COMPLETELY nuke the PPEmbedExpr
+    // and turn it into a list of integers where applicable.
+    if (InitListExpr *ILExpr = dyn_cast_if_present<InitListExpr>(Init);
+        ILExpr) {
+      ArrayRef<Expr *> Inits = ILExpr->inits();
+      SmallVector<Expr *, 4> OutputExprList{};
+      if (ExpandPPEmbedExprInExprList(Inits, OutputExprList, false) ==
+          PPEmbedExpr::Expanded) {
+        ILExpr->resizeInits(Context, OutputExprList.size());
+        for (size_t I = 0; I < OutputExprList.size(); ++I) {
+          auto &InitExpr = OutputExprList[I];
+          ILExpr->setInit(I, InitExpr);
+        }
+      }
+    }
+  }
+
   // WebAssembly tables can't be used to initialise a variable.
   if (Init && !Init->getType().isNull() &&
       Init->getType()->isWebAssemblyTableType()) {
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index f9c010b1a002488..37321d2417a7d2e 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17022,7 +17022,8 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
                                          SourceLocation RParenLoc) {
   if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression))
     return nullptr;
-
+  if (DiagnosePPEmbedExpr(AssertExpr, StaticAssertLoc, PPEEC_StaticAssert))
+    return nullptr;
   return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr,
                                       AssertMessageExpr, RParenLoc, false);
 }
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 75730ea888afb41..ebeed7f4d2b485e 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1412,6 +1412,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Expr::SizeOfPackExprClass:
   case Expr::StringLiteralClass:
   case Expr::SourceLocExprClass:
+  case Expr::PPEmbedExprClass:
   case Expr::ConceptSpecializationExprClass:
   case Expr::RequiresExprClass:
     // These expressions can never throw.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cf45fc388083ce6..c10e6501daef6e2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7110,6 +7110,13 @@ static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S,
       << FixItHint::CreateInsertion(DRE->getLocation(), "std::");
 }
 
+void Sema::ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc,
+                                   SmallVectorImpl<Expr *> &ArgExprs,
+                                   SourceLocation RParenLoc) {
+  [[maybe_unused]] PPEmbedExpr::Action Action =
+      ExpandPPEmbedExprInExprList(ArgExprs);
+}
+
 ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
                                MultiExprArg ArgExprs, SourceLocation RParenLoc,
                                Expr *ExecConfig) {
@@ -7947,8 +7954,17 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
     }
   }
 
-  InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList,
-                                               RBraceLoc);
+  InitListExpr *E = nullptr;
+  if (InitArgList.size() > 1 &&
+      CheckExprListForPPEmbedExpr(InitArgList, std::nullopt) !=
+          PPEmbedExpr::NotFound) {
+    SmallVector<Expr *, 4> OutputExprList;
+    ExpandPPEmbedExprInExprList(InitArgList, OutputExprList);
+    E = new (Context)
+        InitListExpr(Context, LBraceLoc, OutputExprList, RBraceLoc);
+  } else {
+    E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc);
+  }
   E->setType(Context.VoidTy); // FIXME: just a place holder for now.
   return E;
 }
@@ -17570,6 +17586,225 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
       SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext);
 }
 
+ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
+                                  SourceLocation Base64DataLocation,
+                                  SourceLocation RPLoc, StringLiteral *Filename,
+                                  QualType ElementTy,
+                                  std::vector<char> BinaryData) {
+  uint64_t ArraySizeRawVal[] = {BinaryData.size()};
+  llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
+                                     1, ArraySizeRawVal));
+  QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
+                                                  ArrayType::Normal, 0);
+  StringLiteral *BinaryDataLiteral = StringLiteral::Create(
+      Context, StringRef(BinaryData.data(), BinaryData.size()),
+      StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation);
+  return new (Context)
+      PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
+                  RPLoc, CurContext);
+}
+
+IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed) {
+  assert(PPEmbed->getDataElementCount(Context) == 1 &&
+         "Data should only contain a single element");
+  StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+  QualType ElementTy = PPEmbed->getType();
+  const size_t TargetWidth = Context.getTypeSize(ElementTy);
+  const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+  StringRef Data = DataLiteral->getBytes();
+  SmallVector<uint64_t, 4> ByteVals{};
+  for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+    if ((ValIndex % sizeof(uint64_t)) == 0) {
+      ByteVals.push_back(0);
+    }
+    const unsigned char DataByte = Data[ValIndex];
+    ByteVals.back() |=
+        (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+  }
+  ArrayRef<uint64_t> ByteValsRef(ByteVals);
+  return IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                ElementTy, DataLiteral->getBeginLoc());
+}
+
+PPEmbedExpr::Action
+Sema::CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
+                                  std::optional<QualType> MaybeInitType) {
+  if (ExprList.empty()) {
+    return PPEmbedExpr::NotFound;
+  }
+  PPEmbedExpr *First = ExprList.size() == 1
+                           ? dyn_cast_if_present<PPEmbedExpr>(ExprList[0])
+                           : nullptr;
+  if (First) {
+    // only one and it's an embed
+    if (MaybeInitType) {
+      // With the type information, we have a duty to check if it matches;
+      // if not, explode it out into a list of integer literals.
+      QualType &InitType = *MaybeInitType;
+      if (InitType->isArrayType()) {
+        const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe();
+        QualType InitElementTy = InitArrayType->getElementType();
+        QualType PPEmbedExprElementTy = First->getType();
+        const bool TypesMatch =
+            Context.typesAreCompatible(InitElementTy, PPEmbedExprElementTy) ||
+            (InitElementTy->isCharType() && PPEmbedExprElementTy->isCharType());
+        if (TypesMatch) {
+          // Keep the PPEmbedExpr, report that everything has been found.
+          return PPEmbedExpr::FoundOne;
+        }
+      }
+    } else {
+      // leave it, possibly adjusted later!
+      return PPEmbedExpr::FoundOne;
+    }
+  }
+  if (std::find_if(ExprList.begin(), ExprList.end(),
+                   [](const Expr *const SomeExpr) {
+                     return isa<PPEmbedExpr>(SomeExpr);
+                   }) == ExprList.end()) {
+    // We didn't find one.
+    return PPEmbedExpr::NotFound;
+  }
+  // Otherwise, we found one but it is not the sole entry in the initialization
+  // list.
+  return PPEmbedExpr::Expanded;
+}
+
+PPEmbedExpr::Action
+Sema::ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &ExprList) {
+  PPEmbedExpr::Action Action = PPEmbedExpr::NotFound;
+  SmallVector<uint64_t, 4> ByteVals{};
+  for (size_t I = 0; I < ExprList.size();) {
+    Expr *&OriginalExpr = ExprList[I];
+    PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(OriginalExpr);
+    if (!PPEmbed) {
+      ++I;
+      continue;
+    }
+    auto ExprListIt = ExprList.erase(&OriginalExpr);
+    const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context);
+    if (ExpectedDataElements == 0) {
+      // No ++I, we are already pointing to newest element.
+      continue;
+    }
+    Action = PPEmbedExpr::Expanded;
+    StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+    QualType ElementTy = PPEmbed->getType();
+    const size_t TargetWidth = Context.getTypeSize(ElementTy);
+    const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+    StringRef Data = DataLiteral->getBytes();
+    size_t Insertions = 0;
+    for (size_t ByteIndex = 0; ByteIndex < Data.size();
+         ByteIndex += BytesPerElement) {
+      ByteVals.clear();
+      for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+        if ((ValIndex % sizeof(uint64_t)) == 0) {
+          ByteVals.push_back(0);
+        }
+        const unsigned char DataByte = Data[ByteIndex + ValIndex];
+        ByteVals.back() |=
+            (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+      }
+      ArrayRef<uint64_t> ByteValsRef(ByteVals);
+      IntegerLiteral *IntLit =
+          IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                 ElementTy, DataLiteral->getBeginLoc());
+      ExprListIt = ExprList.insert(ExprListIt, IntLit);
+      ++Insertions;
+      // make sure we are inserting **after** the item we just inserted, not
+      // before
+      ++ExprListIt;
+    }
+    assert(Insertions == ExpectedDataElements);
+    I += Insertions;
+  }
+  return PPEmbedExpr::Expanded;
+}
+
+PPEmbedExpr::Action
+Sema::ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
+                                  SmallVectorImpl<Expr *> &OutputExprList,
+                                  bool ClearOutputFirst) {
+  if (ClearOutputFirst) {
+    OutputExprList.clear();
+  }
+  size_t ExpectedResize = OutputExprList.size() + ExprList.size();
+  const auto FindPPEmbedExpr = [](const Expr *const SomeExpr) {
+    return isa<PPEmbedExpr>(SomeExpr);
+  };
+  if (std::find_if(ExprList.begin(), ExprList.end(), FindPPEmbedExpr) ==
+      ExprList.end()) {
+    return PPEmbedExpr::NotFound;
+  }
+  SmallVector<uint64_t, 4> ByteVals{};
+  OutputExprList.reserve(ExpectedResize);
+  for (size_t I = 0; I < ExprList.size(); ++I) {
+    Expr *OriginalExpr = ExprList[I];
+    PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(OriginalExpr);
+    if (!PPEmbed) {
+      OutputExprList.push_back(OriginalExpr);
+      continue;
+    }
+    StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+    QualType ElementTy = PPEmbed->getType();
+    const size_t TargetWidth = Context.getTypeSize(ElementTy);
+    const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+    StringRef Data = DataLiteral->getBytes();
+    for (size_t ByteIndex = 0; ByteIndex < Data.size();
+         ByteIndex += BytesPerElement) {
+      ByteVals.clear();
+      for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+        if ((ValIndex % sizeof(uint64_t)) == 0) {
+          ByteVals.push_back(0);
+        }
+        const unsigned char DataByte = Data[ByteIndex + ValIndex];
+        ByteVals.back() |=
+            (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+      }
+      ArrayRef<uint64_t> ByteValsRef(ByteVals);
+      IntegerLiteral *IntLit =
+          IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                 ElementTy, DataLiteral->getBeginLoc());
+      OutputExprList.push_back(IntLit);
+    }
+  }
+  return PPEmbedExpr::Expanded;
+}
+
+StringRef Sema::GetLocationName(PPEmbedExprContext Context) const {
+  switch (Context) {
+  default:
+    llvm_unreachable("unhandled PPEmbedExprContext value");
+  case PPEEC__StaticAssert:
+    return "_Static_assert";
+  case PPEEC_StaticAssert:
+    return "static_assert";
+  }
+}
+
+bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
+                               PPEmbedExprContext PPEmbedContext,
+                               bool SingleAllowed) {
+  PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
+  if (!PPEmbed)
+    return true;
+
+  if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
+    E = ExpandSinglePPEmbedExpr(PPEmbed);
+    return true;
+  }
+
+  StringRef LocationName = GetLocationName(PPEmbedContext);
+  StringRef DiagnosticMessage =
+      (SingleAllowed ? "cannot use a preprocessor embed that expands to "
+                       "nothing or expands to "
+                       "more than one item in "
+                     : "cannot use a preprocessor embed in ");
+  Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
+      << DiagnosticMessage << 1 << LocationName;
+  return false;
+}
+
 bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,
                                         bool Diagnose) {
   if (!getLangOpts().ObjC)
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index ff370dd1e080b2b..234e678c71b1401 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1623,6 +1623,62 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
   return Param;
 }
 
+void Sema::ModifyTemplateArguments(
+    const TemplateTy &Template,
+    SmallVectorImpl<ParsedTemplateArgument> &TemplateArgs) {
+  SmallVector<uint64_t, 4> ByteVals{};
+  for (size_t I = 0; I < TemplateArgs.size();) {
+    ParsedTemplateArgument &OriginalArg = TemplateArgs[I];
+    if (OriginalArg.getKind() != ParsedTemplateArgument::NonType) {
+      ++I;
+      continue;
+    }
+    PPEmbedExpr *PPEmbed = dyn_cast<PPEmbedExpr>(OriginalArg.getAsExpr());
+    if (!PPEmbed) {
+      ++I;
+      continue;
+    }
+    auto TemplateArgListIt = TemplateArgs.erase(&OriginalArg);
+    const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context);
+    if (ExpectedDataElements == 0) {
+      // No ++I; already pointing at the right element!
+      continue;
+    }
+    StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+    QualType ElementTy = PPEmbed->getType();
+    const size_t TargetWidth = Context.getTypeSize(ElementTy);
+    const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+    StringRef Data = DataLiteral->getBytes();
+    size_t Insertions = 0;
+    for (size_t ByteIndex = 0; ByteIndex < Data.size();
+         ByteIndex += BytesPerElement) {
+      ByteVals.clear();
+      for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+        if ((ValIndex % sizeof(uint64_t)) == 0) {
+          ByteVals.push_back(0);
+        }
+        const unsigned char DataByte = Data[ByteIndex + ValIndex];
+        ByteVals.back() |=
+            (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+      }
+      ArrayRef<uint64_t> ByteValsRef(ByteVals);
+      IntegerLiteral *IntLit =
+          IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                 ElementTy, DataLiteral->getBeginLoc());
+      TemplateArgListIt = TemplateArgs.insert(
+          TemplateArgListIt,
+          ParsedTemplateArgument(ParsedTemplateArgument::NonType, IntLit,
+                                 OriginalArg.getLocation()));
+      ++Insertions;
+      // make sure we are inserting **after** the item we just inserted, not
+      // before
+      ++TemplateArgListIt;
+    }
+    assert(Insertions == ExpectedDataElements);
+    I += Insertions;
+  }
+}
+
 /// ActOnTemplateTemplateParameter - Called when a C++ template template
 /// parameter (e.g. T in template <template \<typename> class T> class array)
 /// has been parsed. S is the current scope.
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8fafdd4f5caa1ed..ed5a03393d4adb5 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12127,6 +12127,12 @@ ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
                                            getSema().CurContext);
 }
 
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformPPEmbedExpr(PPEmbedExpr *E) {
+  // TODO: fully implement for tree transformations
+  return E;
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 1bdc3fa3bea455a..9acf786cf3cc463 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1297,6 +1297,15 @@ void ASTStmtReader::VisitSourceLocExpr(SourceLocExpr *E) {
       static_cast<SourceLocExpr::IdentKind>(Record.readInt());
 }
 
+void ASTStmtReader::VisitPPEmbedExpr(PPEmbedExpr *E) {
+  VisitExpr(E);
+  E->ParentContext = readDeclAs<DeclContext>();
+  E->BuiltinLoc = readSourceLocation();
+  E->RParenLoc = readSourceLocation();
+  E->Filename = cast<StringLiteral>(Record.readSubStmt());
+  E->BinaryData = cast<StringLiteral>(Record.readSubStmt());
+}
+
 void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
   E->setAmpAmpLoc(readSourceLocation());
@@ -3121,6 +3130,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) SourceLocExpr(Empty);
       break;
 
+    case EXPR_BUILTIN_PP_EMBED:
+      S = new (Context) PPEmbedExpr(Empty);
+      break;
+
     case EXPR_ADDR_LABEL:
       S = new (Context) AddrLabelExpr(Empty);
       break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 125ca17c0c1212e..482daabe30f8349 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1169,6 +1169,16 @@ void ASTStmtWriter::VisitSourceLocExpr(SourceLocExpr *E) {
   Code = serialization::EXPR_SOURCE_LOC;
 }
 
+void ASTStmtWriter::VisitPPEmbedExpr(PPEmbedExpr *E) {
+  VisitExpr(E);
+  Record.AddDeclRef(cast_or_null<Decl>(E->getParentContext()));
+  Record.AddSourceLocation(E->getBeginLoc());
+  Record.AddSourceLocation(E->getEndLoc());
+  Record.AddStmt(E->getFilenameStringLiteral());
+  Record.AddStmt(E->getDataStringLiteral());
+  Code = serialization::EXPR_BUILTIN_PP_EMBED;
+}
+
 void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
   Record.AddSourceLocation(E->getAmpAmpLoc());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 451ee91b94533d5..70347fb9ffb2ca7 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2411,6 +2411,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
+
+    case Stmt::PPEmbedExprClass:
+      llvm_unreachable("Support for PPEmbedExpr is not implemented.");
+      break;
   }
 }
 
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
new file mode 100644
index 000000000000000..1639fb7af7f07b0
--- /dev/null
+++ b/clang/test/Preprocessor/embed_art.c
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+const char data[] = {
+#embed <media/art.txt>
+};
+const char data2[] = {
+#embed <media/art.txt>
+, 0
+};
+const char data3[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const char data4[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+_Static_assert(sizeof(data) == 274, "");
+_Static_assert(' ' == data[0], "");
+_Static_assert('_' == data[11], "");
+_Static_assert('\n' == data[273], "");
+_Static_assert(sizeof(data2) == 275, "");
+_Static_assert(' ' == data2[0], "");
+_Static_assert('_' == data2[11], "");
+_Static_assert('\n' == data2[273], "");
+_Static_assert('\0' == data2[274], "");
+_Static_assert(sizeof(data3) == 275, "");
+_Static_assert(' ' == data3[0], "");
+_Static_assert('_' == data3[11], "");
+_Static_assert('\n' == data3[273], "");
+_Static_assert('\0' == data3[274], "");
+_Static_assert(sizeof(data4) == 275, "");
+_Static_assert(' ' == data4[0], "");
+_Static_assert('_' == data4[11], "");
+_Static_assert('\n' == data4[273], "");
+_Static_assert('\0' == data4[274], "");
+
+const signed char data5[] = {
+#embed <media/art.txt>
+};
+const signed char data6[] = {
+#embed <media/art.txt>
+, 0
+};
+const signed char data7[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const signed char data8[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+_Static_assert(sizeof(data5) == 274, "");
+_Static_assert(' ' == data5[0], "");
+_Static_assert('_' == data5[11], "");
+_Static_assert('\n' == data5[273], "");
+_Static_assert(sizeof(data6) == 275, "");
+_Static_assert(' ' == data6[0], "");
+_Static_assert('_' == data6[11], "");
+_Static_assert('\n' == data6[273], "");
+_Static_assert('\0' == data6[274], "");
+_Static_assert(sizeof(data7) == 275, "");
+_Static_assert(' ' == data7[0], "");
+_Static_assert('_' == data7[11], "");
+_Static_assert('\n' == data7[273], "");
+_Static_assert('\0' == data7[274], "");
+_Static_assert(sizeof(data8) == 275, "");
+_Static_assert(' ' == data8[0], "");
+_Static_assert('_' == data8[11], "");
+_Static_assert('\n' == data8[273], "");
+_Static_assert('\0' == data8[274], "");
+
+const unsigned char data9[] = {
+#embed <media/art.txt>
+};
+const unsigned char data10[] = {
+0,
+#embed <media/art.txt>
+};
+const unsigned char data11[] = {
+#embed <media/art.txt> prefix(0,)
+};
+const unsigned char data12[] = {
+0
+#embed <media/art.txt> prefix(,)
+};
+_Static_assert(sizeof(data9) == 274, "");
+_Static_assert(' ' == data9[0], "");
+_Static_assert('_' == data9[11], "");
+_Static_assert('\n' == data9[273], "");
+_Static_assert(sizeof(data10) == 275, "");
+_Static_assert(' ' == data10[1], "");
+_Static_assert('_' == data10[12], "");
+_Static_assert('\n' == data10[274], "");
+_Static_assert('\0' == data10[0], "");
+_Static_assert(sizeof(data11) == 275, "");
+_Static_assert(' ' == data11[1], "");
+_Static_assert('_' == data11[12], "");
+_Static_assert('\n' == data11[274], "");
+_Static_assert('\0' == data11[0], "");
+_Static_assert(sizeof(data12) == 275, "");
+_Static_assert(' ' == data12[1], "");
+_Static_assert('_' == data12[12], "");
+_Static_assert('\n' == data12[274], "");
+_Static_assert('\0' == data12[0], "");
+
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
new file mode 100644
index 000000000000000..3be4e1c2a6cf870
--- /dev/null
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+const char data =
+#embed "single_byte.txt"
+;
+_Static_assert('a' == data[0]);
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
new file mode 100644
index 000000000000000..5971a75ee000bbf
--- /dev/null
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+#embed <media/empty>
+;
+
+void f (unsigned char x) { (void)x;}
+void g () {}
+void h (unsigned char x, int y) {(void)x; (void)y;}
+int i () {
+	return
+#embed <single_byte.txt>
+		;
+}
+
+_Static_assert(
+#embed <single_byte.txt> suffix(,)
+""
+);
+_Static_assert(
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <single_byte.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+_Static_assert(sizeof
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <jk.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+
+#ifdef __cplusplus
+template <int First, int Second>
+void j() {
+	static_assert(First == 'j', "");
+	static_assert(Second == 'k', "");
+}
+#endif
+
+void do_stuff() {
+	f(
+#embed <single_byte.txt>
+	);
+	g(
+#embed <media/empty>
+	);
+	h(
+#embed <jk.txt>
+	);
+	int r = i();
+	(void)r;
+#ifdef __cplusplus
+	j<
+#embed <jk.txt>
+	>(
+#embed <media/empty>
+	);
+#endif
+}
+// expected-no-diagnostics
diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h
index 3d96884749b32f4..8fcef706e916733 100644
--- a/llvm/include/llvm/Support/Base64.h
+++ b/llvm/include/llvm/Support/Base64.h
@@ -20,37 +20,43 @@
 
 namespace llvm {
 
-template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+template <class InputBytes, class OutputContainer>
+void encodeBase64(InputBytes const &Bytes, OutputContainer &OutputBuffer) {
   static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                               "abcdefghijklmnopqrstuvwxyz"
                               "0123456789+/";
-  std::string Buffer;
-  Buffer.resize(((Bytes.size() + 2) / 3) * 4);
+  const std::size_t IndexOffset = OutputBuffer.size();
+  OutputBuffer.resize(OutputBuffer.size() + (((Bytes.size() + 2) / 3) * 4));
 
   size_t i = 0, j = 0;
   for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) {
     uint32_t x = ((unsigned char)Bytes[i] << 16) |
                  ((unsigned char)Bytes[i + 1] << 8) |
                  (unsigned char)Bytes[i + 2];
-    Buffer[j + 0] = Table[(x >> 18) & 63];
-    Buffer[j + 1] = Table[(x >> 12) & 63];
-    Buffer[j + 2] = Table[(x >> 6) & 63];
-    Buffer[j + 3] = Table[x & 63];
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
   }
   if (i + 1 == Bytes.size()) {
     uint32_t x = ((unsigned char)Bytes[i] << 16);
-    Buffer[j + 0] = Table[(x >> 18) & 63];
-    Buffer[j + 1] = Table[(x >> 12) & 63];
-    Buffer[j + 2] = '=';
-    Buffer[j + 3] = '=';
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = '=';
+    OutputBuffer[IndexOffset + j + 3] = '=';
   } else if (i + 2 == Bytes.size()) {
     uint32_t x =
         ((unsigned char)Bytes[i] << 16) | ((unsigned char)Bytes[i + 1] << 8);
-    Buffer[j + 0] = Table[(x >> 18) & 63];
-    Buffer[j + 1] = Table[(x >> 12) & 63];
-    Buffer[j + 2] = Table[(x >> 6) & 63];
-    Buffer[j + 3] = '=';
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = '=';
   }
+}
+
+template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+  std::string Buffer;
+  encodeBase64(Bytes, Buffer);
   return Buffer;
 }
 

>From 77aad07644b135196511dfe1d60bc08617e9d72b Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 10:31:54 -0500
Subject: [PATCH 03/50] Update based on API changes in community

---
 clang/lib/Parse/ParseExpr.cpp | 2 +-
 clang/lib/Sema/SemaExpr.cpp   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 25da32ee0b88c1d..4c96b62b3e323bd 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2918,7 +2918,7 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     } else {
       StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
       StringRef Base64StrData = Base64Str->getBytes();
-      if (Base64Str->getKind() != StringLiteral::Ordinary) {
+      if (Base64Str->getKind() != StringLiteralKind::Ordinary) {
         Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
             << 0
             << "'__builtin_pp_embed' with valid base64 encoding that is an "
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index be1f22bc93dde7f..87626d6af05d7d0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17609,10 +17609,10 @@ ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
   llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
                                      1, ArraySizeRawVal));
   QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
-                                                  ArrayType::Normal, 0);
+                                                  ArraySizeModifier::Normal, 0);
   StringLiteral *BinaryDataLiteral = StringLiteral::Create(
       Context, StringRef(BinaryData.data(), BinaryData.size()),
-      StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation);
+      StringLiteralKind::Ordinary, false, ArrayTy, Base64DataLocation);
   return new (Context)
       PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
                   RPLoc, CurContext);

>From 1cca72573478b5572d10721e9c94f2aea2d7e394 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:14:26 -0500
Subject: [PATCH 04/50] We don't yet expose a libclang cursor for embed
 expressions

---
 clang/tools/libclang/CXCursor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index fd03c48ba1a42aa..08f5830afaa9625 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -335,6 +335,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::ObjCSubscriptRefExprClass:
   case Stmt::RecoveryExprClass:
   case Stmt::SYCLUniqueStableNameExprClass:
+  case Stmt::PPEmbedExprClass:
     K = CXCursor_UnexposedExpr;
     break;
 

>From cd6142dc5899dd55ca693665ea313521db750d74 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:40:24 -0500
Subject: [PATCH 05/50] Update preprocessor tests for new builtin macros

---
 clang/test/Preprocessor/init-aarch64.c | 3 +++
 clang/test/Preprocessor/init.c         | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index 2b7cc57f2303333..b666fa99f39b42f 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -262,6 +262,9 @@
 // AARCH64-NEXT: #define __SIZE_WIDTH__ 64
 // AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL
 // AARCH64_CXX: #define __STDCPP_THREADS__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2
+// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0
 // AARCH64-NEXT: #define __STDC_HOSTED__ 1
 // AARCH64-NEXT: #define __STDC_UTF_16__ 1
 // AARCH64-NEXT: #define __STDC_UTF_32__ 1
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index a0a2879cb58c7fc..0f728a69c34e561 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -1797,6 +1797,9 @@
 // WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
 // WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0
 // WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
 // WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
 // WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__

>From 495f1d49d3e88c294be43e752ef699c267f67f8c Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:40:52 -0500
Subject: [PATCH 06/50] Fix logical think-o with the test

---
 clang/test/Preprocessor/embed_single_entity.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
index 3be4e1c2a6cf870..8cbee2a93626152 100644
--- a/clang/test/Preprocessor/embed_single_entity.c
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 -embed-dir=%S/Inputs -verify
 
 const char data =
 #embed "single_byte.txt"
 ;
-_Static_assert('a' == data[0]);
+_Static_assert('b' == data);
 // expected-no-diagnostics

>From 680c3798811c3df1c8e92181a79655b24349ebce Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 13:05:09 -0500
Subject: [PATCH 07/50] Fix -Wreorder diagnostics; NFC

---
 clang/include/clang/Lex/PPEmbedParameters.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index 7b76d2d573c23bd..dfc835ecfc835af 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -27,7 +27,7 @@ class PPEmbedParameterOffset : public PPDirectiveParameter {
 
   PPEmbedParameterOffset(size_t Offset, SourceLocation Start,
                          SourceLocation End)
-      : Offset(Offset), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Offset(Offset) {}
 };
 
 /// Preprocessor standard embed parameter "limit"
@@ -37,7 +37,7 @@ class PPEmbedParameterLimit : public PPDirectiveParameter {
   size_t Limit;
 
   PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End)
-      : Limit(Limit), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Limit(Limit) {}
 };
 
 /// Preprocessor standard embed parameter "prefix"
@@ -48,7 +48,7 @@ class PPEmbedParameterPrefix : public PPDirectiveParameter {
 
   PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
                          SourceLocation End)
-      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
 /// Preprocessor standard embed parameter "suffix"
@@ -59,7 +59,7 @@ class PPEmbedParameterSuffix : public PPDirectiveParameter {
 
   PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
                          SourceLocation End)
-      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
 /// Preprocessor standard embed parameter "if_empty"
@@ -70,7 +70,7 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
 
   PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
                           SourceLocation End)
-      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
 } // end namespace clang

>From a0f8278db25809e3fc397edaac909ef809931567 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 13:15:37 -0500
Subject: [PATCH 08/50] Clean up these constructors to take a SmallVectorImpl

This way we're not tied to a SmallVector<Token, 2> specifically in callers.
---
 clang/include/clang/Lex/PPEmbedParameters.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index dfc835ecfc835af..f6de84bdc915148 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -46,7 +46,7 @@ class PPEmbedParameterPrefix : public PPDirectiveParameter {
 public:
   SmallVector<Token, 2> Tokens;
 
-  PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+  PPEmbedParameterPrefix(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
                          SourceLocation End)
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
@@ -57,7 +57,7 @@ class PPEmbedParameterSuffix : public PPDirectiveParameter {
 public:
   SmallVector<Token, 2> Tokens;
 
-  PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+  PPEmbedParameterSuffix(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
                          SourceLocation End)
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
@@ -68,7 +68,7 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
 public:
   SmallVector<Token, 2> Tokens;
 
-  PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
+  PPEmbedParameterIfEmpty(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
                           SourceLocation End)
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };

>From 4d9ed9e2f4bd27013681461edda6768ebbb7aaa1 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:13:32 -0500
Subject: [PATCH 09/50] Fix a crash with argument parsing

If the user passes -fno-builtin, then the call to getValue() will
assert due to an out of bounds access. So we check to see which form
the user passes (-fno-builtin or -fno-builtin-pp_embed).

Additionally, we need to round trip the argument properly depending on
which form the user passed.
---
 clang/lib/Frontend/CompilerInvocation.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index ce1341421bab694..6660a116dad8022 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4330,8 +4330,14 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
   for (const auto &EmbedEntry : Opts.EmbedEntries)
     GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
 
-  if (Opts.NoBuiltinPPEmbed)
-    GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+  if (Opts.NoBuiltinPPEmbed) {
+    // We need to figure out whether the user passed -fno-builtins or
+    // specifically disabled pp_embed. If NoBuiltin is true, we don't need to
+    // generate an arg because that disables everything. Otherwise, we assume
+    // the user passed -fno-builtin-pp_embed and generate that.
+    if (!LangOpts.NoBuiltin)
+      GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+  }
 
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
@@ -4432,10 +4438,12 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
 
   // Can disable the internal embed builtin / token
   for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
-    StringRef Val = A->getValue();
-    if (Val == "pp_embed") {
-      Opts.NoBuiltinPPEmbed = true;
-    }
+    bool NoBuiltinEmbed = false;
+    if (A->getNumValues())
+      NoBuiltinEmbed = A->getValue() == StringRef("pp_embed");
+    else
+      NoBuiltinEmbed = true; // All builtins are disabled.
+    Opts.NoBuiltinPPEmbed = NoBuiltinEmbed;
   }
 
   // Always avoid lexing editor placeholders when we're just running the

>From 8a466f3354cbf862a3bc1edd71c32289f337ebb0 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:55:36 -0500
Subject: [PATCH 10/50] Back out unrelated CMake changes

---
 clang/CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 1b88905da3b8597..9b52c58be41e7f7 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -300,7 +300,6 @@ configure_file(
   ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc)
 
 # Add appropriate flags for GCC
-option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
   if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -308,7 +307,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   endif ()
 
   # Enable -pedantic for Clang even if it's not enabled for LLVM.
-  if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC)
+  if (NOT LLVM_ENABLE_PEDANTIC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long")
   endif ()
 

>From a3d4b13f9dbd9d11bbd8f619de3ac888a880bf82 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:57:50 -0500
Subject: [PATCH 11/50] Remove a spurious #undef; NFC

---
 clang/include/clang/Basic/TokenKinds.def | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 6b726463f0cdd31..613f6d64eb8bdc9 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -991,7 +991,6 @@ ANNOTATION(repl_input_end)
 #undef CXX11_KEYWORD
 #undef KEYWORD
 #undef PUNCTUATOR
-#undef BUILTINOK
 #undef TOK
 #undef C99_KEYWORD
 #undef C23_KEYWORD

>From 7dad1be74cc40cbb1694d58e8f7553c8741634ec Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 15:12:17 -0500
Subject: [PATCH 12/50] Backing out more unnecessary CMake changes

---
 llvm/CMakeLists.txt                    | 7 -------
 llvm/cmake/modules/GetHostTriple.cmake | 6 +++---
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index cb049ccb7d9c8cd..7ff3acd48304de7 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -780,13 +780,6 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
     "Semicolon-separated list of components to include in libLLVM, or \"all\".")
 endif()
 
-option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON)
-# Quiet down MSVC-style secure CRT warnings
-if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS)
-  add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1)
-endif()
-
-
 if(MSVC)
   option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON)
   # Set this variable to OFF here so it can't be set with a command-line
diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake
index 828227f2f25a2f0..1be13bc01ab9b25 100644
--- a/llvm/cmake/modules/GetHostTriple.cmake
+++ b/llvm/cmake/modules/GetHostTriple.cmake
@@ -2,7 +2,7 @@
 # Invokes config.guess
 
 function( get_host_triple var )
-  if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") )
+  if( MSVC )
     if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" )
       set( value "aarch64-pc-windows-msvc" )
     elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" )
@@ -41,7 +41,7 @@ function( get_host_triple var )
     else()
       set( value "powerpc-ibm-aix" )
     endif()
-  else()
+  else( MSVC )
     if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS)
       message(WARNING "unable to determine host target triple")
     else()
@@ -55,6 +55,6 @@ function( get_host_triple var )
       endif( NOT TT_RV EQUAL 0 )
       set( value ${TT_OUT} )
     endif()
-  endif()
+  endif( MSVC )
   set( ${var} ${value} PARENT_SCOPE )
 endfunction( get_host_triple var )

>From 29ac376978331a6453575004814cb8e9364bd933 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 07:16:21 -0500
Subject: [PATCH 13/50] Correct the logic for this diagnostic checking function

This fixes a few hundred failing test cases for me; still several left
failing though.
---
 clang/lib/Sema/SemaExpr.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 87626d6af05d7d0..c932abf8d931906 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17801,11 +17801,11 @@ bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
                                bool SingleAllowed) {
   PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
   if (!PPEmbed)
-    return true;
+    return false;
 
   if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
     E = ExpandSinglePPEmbedExpr(PPEmbed);
-    return true;
+    return false;
   }
 
   StringRef LocationName = GetLocationName(PPEmbedContext);
@@ -17816,7 +17816,7 @@ bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
                      : "cannot use a preprocessor embed in ");
   Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
       << DiagnosticMessage << 1 << LocationName;
-  return false;
+  return true;
 }
 
 bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,

>From e4e28eb990098d8a203013d946dd5a4243a8fb0f Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 07:38:23 -0500
Subject: [PATCH 14/50] Fix think-o with test to get it to pass

---
 clang/test/Preprocessor/embed_path_quote.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
index 791cd9176ebe0ab..7e39d9be3b0a523 100644
--- a/clang/test/Preprocessor/embed_path_quote.c
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -4,5 +4,5 @@ const char data[] = {
 #embed "single_byte.txt"
 };
 _Static_assert(sizeof(data) == 1, "");
-_Static_assert('a' == data[0], "");
+_Static_assert('b' == data[0], "");
 // expected-no-diagnostics

>From ab5f8c204d03bab9bd516c299a478b0d72467b01 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 08:50:21 -0500
Subject: [PATCH 15/50] Restore previous behavior; fixes two more failing test
 cases

---
 clang/lib/Lex/PPExpressions.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index dda5717afc699da..e0bd73e8680921c 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -935,10 +935,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    const bool IsNonZero = ResVal.Val != 0;
     const SourceRange ValRange = ResVal.getRange();
-    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
-            ValRange};
+    return {std::move(ResVal.Val), false, DT.IncludedUndefinedIds, ValRange};
   }
 
   if (CheckForEoD) {

>From 9d5eadfc04ed7276bab79321294b6bff4f35bb85 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 09:37:59 -0500
Subject: [PATCH 16/50] Clean up the way we expose the __STDC_EMBED_*__ macros;
 NFC

---
 clang/include/clang/Lex/Preprocessor.h  | 11 +++++----
 clang/lib/Frontend/InitPreprocessor.cpp |  9 +++++---
 clang/lib/Lex/PPMacroExpansion.cpp      | 30 ++++++++++++-------------
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index ea461aba0611f0f..8db920ad2dc6610 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -122,6 +122,12 @@ enum MacroUse {
   MU_Undef  = 2
 };
 
+enum class EmbedResult {
+  NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
+  Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
+  Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
+};
+
 /// Engages in a tight little dance with the lexer to efficiently
 /// preprocess tokens.
 ///
@@ -211,9 +217,6 @@ class Preprocessor {
   enum {
     /// Maximum depth of \#includes.
     MaxAllowedIncludeStackDepth = 200,
-    VALUE__STDC_EMBED_NOT_FOUND__ = 0,
-    VALUE__STDC_EMBED_FOUND__ = 1,
-    VALUE__STDC_EMBED_EMPTY__ = 2,
   };
 
   // State that is set before the preprocessor begins.
@@ -2584,7 +2587,7 @@ class Preprocessor {
   ///
   /// Returns predefined `__STDC_EMBED_*` macro values if
   /// successful.
-  int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
+  EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
 
   /// Process a '__has_include("path")' expression.
   ///
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b7d084773b0a195..cc9c6733f442968 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -499,9 +499,12 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__STDC_UTF_32__", "1");
 
   // __has_embed definitions
-  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0");
-  Builder.defineMacro("__STDC_EMBED_FOUND__", "1");
-  Builder.defineMacro("__STDC_EMBED_EMPTY__", "2");
+  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__",
+                      llvm::itostr(static_cast<int>(EmbedResult::NotFound)));
+  Builder.defineMacro("__STDC_EMBED_FOUND__",
+                      llvm::itostr(static_cast<int>(EmbedResult::Found)));
+  Builder.defineMacro("__STDC_EMBED_EMPTY__",
+                      llvm::itostr(static_cast<int>(EmbedResult::Empty)));
 
   if (LangOpts.ObjC)
     Builder.defineMacro("__OBJC__");
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index b25faf8c873d389..a55bc719328ad62 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1272,7 +1272,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
 
 /// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
 /// Returns a filled optional with the value if successful; otherwise, empty.
-int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   // pedwarn for not being on C23
   if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
     auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
@@ -1290,13 +1290,13 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     // Return a valid identifier token.
     assert(Tok.is(tok::identifier));
     Tok.setIdentifierInfo(II);
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   // Get '('. If we don't have a '(', try to form a header-name token.
   do {
     if (this->LexHeaderName(Tok)) {
-      return VALUE__STDC_EMBED_NOT_FOUND__;
+      return EmbedResult::NotFound;
     }
   } while (Tok.getKind() == tok::comment);
 
@@ -1308,19 +1308,19 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     // If the next token looks like a filename or the start of one,
     // assume it is and process it as such.
     if (Tok.isNot(tok::header_name)) {
-      return VALUE__STDC_EMBED_NOT_FOUND__;
+      return EmbedResult::NotFound;
     }
   } else {
     // Save '(' location for possible missing ')' message.
     LParenLoc = Tok.getLocation();
     if (this->LexHeaderName(Tok)) {
-      return VALUE__STDC_EMBED_NOT_FOUND__;
+      return EmbedResult::NotFound;
     }
   }
 
   if (Tok.isNot(tok::header_name)) {
     Diag(Tok.getLocation(), diag::err_pp_expects_filename);
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   SourceLocation FilenameLoc = Tok.getLocation();
@@ -1331,10 +1331,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   if (!Params.Successful) {
     if (Tok.isNot(tok::eod))
       this->DiscardUntilEndOfDirective();
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
   if (Params.UnrecognizedParams > 0) {
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   if (!Tok.is(tok::r_paren)) {
@@ -1342,7 +1342,7 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
         << II << tok::r_paren;
     Diag(LParenLoc, diag::note_matching) << tok::l_paren;
     DiscardUntilEndOfDirective();
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   SmallString<128> FilenameBuffer;
@@ -1364,7 +1364,7 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
   }
   if (!MaybeFileEntry) {
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
   size_t FileSize = MaybeFileEntry->getSize();
   if (Params.MaybeLimitParam) {
@@ -1373,12 +1373,12 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     }
   }
   if (FileSize == 0) {
-    return VALUE__STDC_EMBED_EMPTY__;
+    return EmbedResult::Empty;
   }
   if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) {
-    return VALUE__STDC_EMBED_EMPTY__;
+    return EmbedResult::Empty;
   }
-  return VALUE__STDC_EMBED_FOUND__;
+  return EmbedResult::Found;
 }
 
 bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
@@ -1923,11 +1923,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     // file name string literal using angle brackets (<>) or
     // double-quotes (""), optionally followed by a series of
     // arguments similar to form like attributes.
-    int Value = EvaluateHasEmbed(Tok, II);
+    EmbedResult Value = EvaluateHasEmbed(Tok, II);
 
     if (Tok.isNot(tok::r_paren))
       return;
-    OS << Value;
+    OS << static_cast<int>(Value);
     Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.

>From f88a1aec9865fdd3cb44aaa45d4d141a6195854e Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 09:51:08 -0500
Subject: [PATCH 17/50] Fix a broken pp-trace test

The test needs to care about the three new predefined macros.
---
 clang-tools-extra/test/pp-trace/pp-trace-macro.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
index 1d85607e86b7fff..7c2a231101070d7 100644
--- a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
+++ b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
@@ -31,6 +31,15 @@ X
 // CHECK:        MacroNameTok: __STDC_UTF_32__
 // CHECK-NEXT:   MacroDirective: MD_Define
 // CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_NOT_FOUND__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_FOUND__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_EMPTY__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
 // CHECK:      - Callback: MacroDefined
 // CHECK-NEXT:   MacroNameTok: MACRO
 // CHECK-NEXT:   MacroDirective: MD_Define

>From e7ef292e0e61591eaf3bda238265f45a3e468e48 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 11:56:35 -0500
Subject: [PATCH 18/50] Remove __builtin_pp_embed as a builtin function; NFC

This is a weird builtin function that's more like __builtin_offsetof
in that it takes a type argument. Therefore, it's not really a function
call like other builtins (we wouldn't check its validity in
SemaChecking.cpp).
---
 clang/include/clang/Basic/Builtins.def | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index fa3d83d1a34bec0..ec39e926889b936 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1770,9 +1770,6 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
 // Arithmetic Fence: to prevent FP reordering and reassociation optimizations
 LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES)
 
-// preprocessor embed builtin
-LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES)
-
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN

>From 7c6bc7b776be54f7dca27ce34222c9ca7b1beda4 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:00:02 -0500
Subject: [PATCH 19/50] Add a test for feature testing the builtin

---
 clang/test/Preprocessor/embed_builtin.cpp | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 clang/test/Preprocessor/embed_builtin.cpp

diff --git a/clang/test/Preprocessor/embed_builtin.cpp b/clang/test/Preprocessor/embed_builtin.cpp
new file mode 100644
index 000000000000000..d2547fa0c3f668d
--- /dev/null
+++ b/clang/test/Preprocessor/embed_builtin.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+#if !__has_builtin(__builtin_pp_embed)
+#error "Don't have __builtin_pp_embed?"
+#endif

>From 038c90d4e9dc2c17900064b7e059061165b6d993 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:22:00 -0500
Subject: [PATCH 20/50] Correct parsing behavior and add tests

There is likely more work to be done here to split parsing and semantic
concerns. This also pointed out an issue where __builtin_pp_embed seems
to have a non-void return type, but who knows what it actually returns
as a value.
---
 .../clang/Basic/DiagnosticCommonKinds.td      |   2 +-
 clang/lib/Parse/ParseExpr.cpp                 | 118 +++++++++---------
 clang/test/Parser/embed_builtin.cpp           |  14 +++
 3 files changed, 75 insertions(+), 59 deletions(-)
 create mode 100644 clang/test/Parser/embed_builtin.cpp

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index b2e770b540944e3..6368f0ceeac3274 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -57,7 +57,7 @@ def err_expected_string_literal : Error<"expected string literal "
           "for optional message in 'availability' attribute|"
           "for %select{language name|source container name|USR}1 in "
           "'external_source_symbol' attribute|"
-          "as argument of '%1' attribute}0">;
+          "as argument of '%1' attribute|as the %ordinal1 argument}0">;
 
 def err_builtin_pp_embed_invalid_argument : Error<
   "invalid argument to '__builtin_pp_embed': %0">;
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 4c96b62b3e323bd..03d181586f83b5a 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2858,86 +2858,88 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     break;
   }
   case tok::kw___builtin_pp_embed: {
-    SourceRange DataTyExprSourceRange{};
+    // __builtin_pp_embed( type-name , string-literal , string-literal )
+    SourceRange DataTyExprSourceRange;
     TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
 
+    if (DataTyExpr.isInvalid()) {
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return ExprError();
+    }
+
     if (ExpectAndConsume(tok::comma)) {
       SkipUntil(tok::r_paren, StopAtSemi);
-      Res = ExprError();
+      return ExprError();
     }
 
-    ExprResult FilenameArgExpr(ParseStringLiteralExpression());
+    if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+      Diag(Tok, diag::err_expected_string_literal)
+          << /*as argument*/ 5 << /*second argument*/ 2;
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return ExprError();
+    }
+    ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
 
-    if (ExpectAndConsume(tok::comma)) {
+    if (FilenameArgExpr.isInvalid() || ExpectAndConsume(tok::comma)) {
       SkipUntil(tok::r_paren, StopAtSemi);
-      Res = ExprError();
+      return ExprError();
     }
 
-    ExprResult Base64ArgExpr(ParseStringLiteralExpression());
+    if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+      Diag(Tok, diag::err_expected_string_literal)
+          << /*as argument*/ 5 << /*third argument*/ 3;
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return ExprError();
+    }
+    ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
 
-    if (Tok.isNot(tok::r_paren)) {
+    if (Base64ArgExpr.isInvalid() || Tok.isNot(tok::r_paren)) {
       Diag(Tok, diag::err_expected) << tok::r_paren;
-      Res = ExprError();
+      return ExprError();
     }
 
     const ASTContext &Context = Actions.getASTContext();
-    QualType DataTy = Context.UnsignedCharTy;
+    QualType DataTy = DataTyExpr.get().get().getCanonicalType();
     size_t TargetWidth = Context.getTypeSize(DataTy);
-    if (DataTyExpr.isInvalid()) {
+    if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+        DataTy.getUnqualifiedType() != Context.CharTy) {
+      // TODO: check if is exactly the same as unsigned char
+      Diag(DataTyExprSourceRange.getBegin(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "only 'char' and 'unsigned char' are supported";
       Res = ExprError();
-    } else {
-      DataTy = DataTyExpr.get().get().getCanonicalType();
-      TargetWidth = Context.getTypeSize(DataTy);
-      if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
-          DataTy.getUnqualifiedType() != Context.CharTy) {
-        // TODO: check if is exactly the same as unsigned char
-        Diag(DataTyExprSourceRange.getBegin(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "only 'char' and 'unsigned char' are supported";
-        Res = ExprError();
-      }
-      if ((TargetWidth % CHAR_BIT) != 0) {
-        Diag(DataTyExprSourceRange.getBegin(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "width of element type is not a multiple of host platform's "
-               "CHAR_BIT!";
-        Res = ExprError();
-      }
     }
-
-    StringLiteral *FilenameLiteral = nullptr;
-    if (FilenameArgExpr.isInvalid()) {
+    if ((TargetWidth % CHAR_BIT) != 0) {
+      Diag(DataTyExprSourceRange.getBegin(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "width of element type is not a multiple of host platform's "
+              "CHAR_BIT!";
       Res = ExprError();
-    } else {
-      FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
     }
 
-    std::vector<char> BinaryData{};
-    if (Base64ArgExpr.isInvalid()) {
+    StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+    std::vector<char> BinaryData;
+    StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+    StringRef Base64StrData = Base64Str->getBytes();
+    if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
+      Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+          << 0
+          << "'__builtin_pp_embed' with valid base64 encoding that is an "
+              "ordinary \"...\" string";
+    }
+    const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+      Diag(Base64Str->getExprLoc(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "expected a valid base64 encoded string";
+    };
+    llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+    llvm::handleAllErrors(std::move(Err), OnDecodeError);
+    if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+      Diag(DataTyExprSourceRange.getBegin(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "size of data does not split evently into the number of bytes "
+              "requested";
       Res = ExprError();
-    } else {
-      StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
-      StringRef Base64StrData = Base64Str->getBytes();
-      if (Base64Str->getKind() != StringLiteralKind::Ordinary) {
-        Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
-            << 0
-            << "'__builtin_pp_embed' with valid base64 encoding that is an "
-               "ordinary \"...\" string";
-      }
-      const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
-        Diag(Base64Str->getExprLoc(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "expected a valid base64 encoded string";
-      };
-      llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
-      llvm::handleAllErrors(std::move(Err), OnDecodeError);
-      if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
-        Diag(DataTyExprSourceRange.getBegin(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "size of data does not split evently into the number of bytes "
-               "requested";
-        Res = ExprError();
-      }
     }
 
     if (!Res.isInvalid()) {
diff --git a/clang/test/Parser/embed_builtin.cpp b/clang/test/Parser/embed_builtin.cpp
new file mode 100644
index 000000000000000..487c11c393ad0ee
--- /dev/null
+++ b/clang/test/Parser/embed_builtin.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+void parsing_diags() {
+  __builtin_pp_embed;                   // expected-error {{expected '(' after '__builtin_pp_embed'}}
+  __builtin_pp_embed(;                  // expected-error {{expected a type}}
+  __builtin_pp_embed();                 // expected-error {{expected a type}}
+  __builtin_pp_embed(12);               // expected-error {{expected a type}}
+  __builtin_pp_embed(int);              // expected-error {{expected ','}}
+  __builtin_pp_embed(int, 12);          // expected-error {{expected string literal as the 2nd argument}}
+  __builtin_pp_embed(int, "", 12);      // expected-error {{expected string literal as the 3rd argument}}
+  __builtin_pp_embed(int, "", "", 12);  // expected-error {{expected ')'}}
+  (void)__builtin_pp_embed(char, L"", "");    // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
+  (void)__builtin_pp_embed(char, "", L"");    // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
+}

>From c204b7358f2fcd495d495831ea71baa67f693711 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:40:23 -0500
Subject: [PATCH 21/50] No longer expose the embed driver options to Flang

The options don't make sense outside of Clang currently.
---
 clang/include/clang/Driver/Options.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index aef200cc5729279..91c6ff70cad7236 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -832,11 +832,11 @@ def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
 def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
     Flags<[RenderJoined]>, Group<EmbedPath_Group>,
-    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    Visibility<[ClangOption, CC1Option]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
 def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
     Flags<[RenderJoined]>, Group<EmbedPath_Group>,
-    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    Visibility<[ClangOption, CC1Option]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
 def MD : Flag<["-"], "MD">, Group<M_Group>,
     HelpText<"Write a depfile containing user and system headers">;

>From ec01bec24f4c71f3bd50ae717490db628cd1dde8 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 07:32:29 -0500
Subject: [PATCH 22/50] Fix type mismatch that was upsetting the precommit CI
 bot

---
 clang/lib/Lex/PPDirectives.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 2902d5da7bc5cf5..9d5d6dcdb7a8c2a 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1386,7 +1386,7 @@ void Preprocessor::HandleDirective(Token &Result) {
       return HandleEmbedDirective(SavedHash.getLocation(), Result,
                                   getCurrentFileLexer()
                                       ? getCurrentFileLexer()->getFileEntry()
-                                      : nullptr);
+                                      : static_cast<FileEntry *>(nullptr));
     case tok::pp_assert:
       //isExtension = true;  // FIXME: implement #assert
       break;

>From f57334a078a20da3da4e327dbceb3dc83ad3a2fc Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:11:00 -0500
Subject: [PATCH 23/50] Fix misuse of Twine and add a test

The issue would previously manifest in -E output where we would print:
  1>
instead of:
  <built-in:embed:1>
---
 clang/lib/Lex/PPDirectives.cpp                 | 18 ++++++------------
 .../Preprocessor/embed_preprocess_to_file.c    | 13 +++++++++++++
 2 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Preprocessor/embed_preprocess_to_file.c

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9d5d6dcdb7a8c2a..695fca9f5157aaa 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3899,11 +3899,6 @@ void Preprocessor::HandleEmbedDirectiveNaive(
   // particular.
   EmbedBuffers.push_back("");
   size_t EmbedBufferNumber = EmbedBuffers.size();
-  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
-  llvm::Twine EmbedBufferName = [](const std::string &Number) {
-    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
-    return PrefixNumber.concat(">");
-  }(EmbedBufferNumberVal);
   std::string &TargetEmbedBuffer = EmbedBuffers.back();
   const size_t TotalSize = BinaryContents.size();
   // In the future, this might change/improve.
@@ -3956,7 +3951,9 @@ void Preprocessor::HandleEmbedDirectiveNaive(
 
   // Create faux-file and its ID, backed by a memory buffer.
   std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
-      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+      llvm::MemoryBuffer::getMemBufferCopy(
+          TargetEmbedBuffer,
+          "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
   assert(EmbedMemBuffer && "Cannot create predefined source buffer");
   FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
   assert(EmbedBufferFID.isValid() &&
@@ -4113,11 +4110,6 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
   // particular.
   EmbedBuffers.push_back("");
   size_t EmbedBufferNumber = EmbedBuffers.size();
-  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
-  llvm::Twine EmbedBufferName = [](const std::string &Number) {
-    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
-    return PrefixNumber.concat(">");
-  }(EmbedBufferNumberVal);
   std::string &TargetEmbedBuffer = EmbedBuffers.back();
   StringRef TypeName = "unsigned char";
   const size_t TotalSize =
@@ -4147,7 +4139,9 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
   TargetEmbedBuffer.append("\")");
   // Create faux-file and its ID, backed by a memory buffer.
   std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
-      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+      llvm::MemoryBuffer::getMemBufferCopy(
+          TargetEmbedBuffer,
+          "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
   assert(EmbedMemBuffer && "Cannot create predefined source buffer");
   FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
   assert(EmbedBufferFID.isValid() &&
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
new file mode 100644
index 000000000000000..96447d4d6b11f7e
--- /dev/null
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -std=c23 -E -embed-dir=%S/Inputs | FileCheck %s
+
+// Ensure that we print out the correct data to the preprocessed file. Note,
+// #embed will do a base64 encoding of the file contents, so if art.txt changes,
+// this test will need to change accordingly as well.
+const char data[] = {
+#embed <media/art.txt>
+};
+
+// CHECK: # 1 "<built-in:embed:1>" 1
+// CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
+// CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
+};

>From 8ef8da333a4c1ddfea85e745546b06849d266228 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:14:00 -0500
Subject: [PATCH 24/50] Remove unused variable; NFC

---
 clang/lib/Parse/ParseExpr.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 03d181586f83b5a..982520a30a35407 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2920,7 +2920,6 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
     std::vector<char> BinaryData;
     StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
-    StringRef Base64StrData = Base64Str->getBytes();
     if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
       Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
           << 0

>From a5517cbb33fc825dbdea037df8967971fe63727d Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:16:25 -0500
Subject: [PATCH 25/50] Fix a typo that snuck into this test

---
 clang/test/Preprocessor/embed_preprocess_to_file.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
index 96447d4d6b11f7e..1706ac457e9224c 100644
--- a/clang/test/Preprocessor/embed_preprocess_to_file.c
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -10,4 +10,3 @@ const char data[] = {
 // CHECK: # 1 "<built-in:embed:1>" 1
 // CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
 // CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
-};

>From 7f856ddef8f43b8c81d02ec2cee3c67e9a0420db Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Fri, 10 Nov 2023 09:01:09 -0500
Subject: [PATCH 26/50] Fix another type mismatch that was upsetting the
 precommit CI bot

---
 clang/lib/Lex/PPMacroExpansion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index a55bc719328ad62..3ff3055a17c4e90 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1356,7 +1356,7 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   assert(!Filename.empty());
   const FileEntry *LookupFromFile =
       this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
-                                  : nullptr;
+                                  : static_cast<FileEntry *>(nullptr);
   OptionalFileEntryRef MaybeFileEntry =
       this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
                             LookupFromFile, nullptr, &RelativePath);

>From b8a57c23e5d36508c4c2b518d7f2fb555d7928fb Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 13 Nov 2023 08:49:18 -0500
Subject: [PATCH 27/50] Fix new compile error from rebase; NFC

---
 clang/lib/Frontend/DependencyFile.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 0fd10b2a177a0e3..b46a16282f22c9c 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -72,7 +72,6 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                       /*FromModule*/ false,
                                       /*IsSystem*/ false,
                                       /*IsModuleFile*/ false,
-                                      &PP.getFileManager(),
                                       /*IsMissing*/ true);
     // Files that actually exist are handled by FileChanged.
   }
@@ -100,7 +99,6 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
     DepCollector.maybeAddDependency(Filename,
                                     /*FromModule=*/false, false,
                                     /*IsModuleFile=*/false,
-                                    &PP.getFileManager(),
                                     /*IsMissing=*/false);
   }
 

>From 6a6f813099b673660295f09d674fa02bf2efaca4 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 13 Nov 2023 13:19:37 -0500
Subject: [PATCH 28/50] Replace __builtin_pp_embed with annotation tokens

We do not want to have a builtin for embed because it poses too many
problems. For example, it allows for recursive embeds through:

  __builtin_pp_embed(
    #embed "file containing a file name.txt"
    ,
    #embed "file containing base64 data.txt"
  )

Instead, we'll use annotation tokens to pass information from the
preprocessor into the parser.
---
 clang/include/clang/AST/Expr.h                |   2 +-
 clang/include/clang/Basic/TokenKinds.def      |   5 +-
 clang/include/clang/Lex/PreprocessorOptions.h |   4 -
 clang/include/clang/Sema/Sema.h               |   2 +-
 clang/lib/AST/StmtPrinter.cpp                 |   7 +-
 clang/lib/Frontend/CompilerInvocation.cpp     |  19 --
 clang/lib/Lex/PPDirectives.cpp                | 127 +++++++-------
 clang/lib/Parse/ParseExpr.cpp                 | 164 ++++++++----------
 clang/lib/Sema/SemaDecl.cpp                   |   3 +-
 clang/test/Parser/embed_builtin.cpp           |  14 --
 clang/test/Preprocessor/embed_builtin.cpp     |   6 -
 .../Preprocessor/embed_preprocess_to_file.c   |   4 +-
 12 files changed, 151 insertions(+), 206 deletions(-)
 delete mode 100644 clang/test/Parser/embed_builtin.cpp
 delete mode 100644 clang/test/Preprocessor/embed_builtin.cpp

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index f6d1fdd5bea2c72..6345faefa62ff26 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4810,7 +4810,7 @@ class SourceLocExpr final : public Expr {
   friend class ASTStmtReader;
 };
 
-/// Represents a function call to __builtin_pp_embed().
+/// Represents a #embed "expression".
 class PPEmbedExpr final : public Expr {
   SourceLocation BuiltinLoc, RParenLoc;
   DeclContext *ParentContext;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index cd5f2016c002d4e..41f308b23ecbb6e 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -756,7 +756,6 @@ ALIAS("__char32_t"   , char32_t          , KEYCXX)
 KEYWORD(__builtin_bit_cast               , KEYALL)
 KEYWORD(__builtin_available              , KEYALL)
 KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL)
-KEYWORD(__builtin_pp_embed               , KEYALL)
 
 // Keywords defined by Attr.td.
 #ifndef KEYWORD_ATTRIBUTE
@@ -973,6 +972,10 @@ ANNOTATION(header_unit)
 // Annotation for end of input in clang-repl.
 ANNOTATION(repl_input_end)
 
+// Annotation for #embed
+ANNOTATION(embed_start)
+ANNOTATION(embed_end)
+
 #undef PRAGMA_ANNOTATION
 #undef ANNOTATION
 #undef TESTING_KEYWORD
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 23f3458d79e0312..d0dac8c23ab0dea 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -170,10 +170,6 @@ class PreprocessorOptions {
   /// User specified embed entries.
   std::vector<std::string> EmbedEntries;
 
-  /// Whether or not naive expansion should be used all the time for
-  /// builtin embed
-  bool NoBuiltinPPEmbed = false;
-
   /// Whether the compiler instance should retain (i.e., not free)
   /// the buffers associated with remapped files.
   ///
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 967f40a3e2de901..48eac54f36f676e 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6089,7 +6089,7 @@ class Sema final {
                                 SourceLocation BuiltinLoc,
                                 SourceLocation RPLoc);
 
-  // __builtin_pp_embed()
+  // #embed
   ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
                               SourceLocation Base64DataLocation,
                               SourceLocation RPLoc, StringLiteral *Filename,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 426de0696965ac0..fa429ed7c7a0493 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1147,9 +1147,10 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
 }
 
 void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) {
-  OS << "__builtin_pp_embed(" << Node->getType() << ", "
-     << Node->getFilenameStringLiteral()->getBytes() << ", \""
-     << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")";
+  // This isn't yet implemented because the contents of the PPEmbedExpr are
+  // not generally retained in the AST. e.g., when used as an initializer, the
+  // expression will be converted into an InitListExpr, etc.
+  assert(false && "not yet implemented");
 }
 
 void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index b7ed4bde112e328..25e73f805246b7a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4329,15 +4329,6 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
   for (const auto &EmbedEntry : Opts.EmbedEntries)
     GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
 
-  if (Opts.NoBuiltinPPEmbed) {
-    // We need to figure out whether the user passed -fno-builtins or
-    // specifically disabled pp_embed. If NoBuiltin is true, we don't need to
-    // generate an arg because that disables everything. Otherwise, we assume
-    // the user passed -fno-builtin-pp_embed and generate that.
-    if (!LangOpts.NoBuiltin)
-      GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
-  }
-
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
 }
@@ -4435,16 +4426,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     Opts.EmbedEntries.push_back(std::string(Val));
   }
 
-  // Can disable the internal embed builtin / token
-  for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
-    bool NoBuiltinEmbed = false;
-    if (A->getNumValues())
-      NoBuiltinEmbed = A->getValue() == StringRef("pp_embed");
-    else
-      NoBuiltinEmbed = true; // All builtins are disabled.
-    Opts.NoBuiltinPPEmbed = NoBuiltinEmbed;
-  }
-
   // Always avoid lexing editor placeholders when we're just running the
   // preprocessor as we never want to emit the
   // "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 695fca9f5157aaa..78269021484824d 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -4070,19 +4070,41 @@ static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1,
   }
 }
 
-void Preprocessor::HandleEmbedDirectiveBuiltin(
+void Preprocessor::HandleEmbedDirectiveImpl(
     SourceLocation HashLoc, const Token &FilenameTok,
     StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
     const LexEmbedParametersResult &Params, StringRef BinaryContents,
     const size_t TargetCharWidth) {
-  // if it's empty, just process it like a normal expanded token stream
+  // Pass off the annotation token stream. The parser expects:
+  //   if_empty-tokens or
+  //   embed-annotation-start
+  //     type-name string-literal , string-literal
+  //   embed-annotation-stop
+  // where the type-name is the type used for each element to embed, the first
+  // string-literal is the resolved file name of the file we loaded contents
+  // from, and the second string-literal is the base64 encoded data we loaded
+  // from the file. The comma separation between string-literals prevents the
+  // literals from combining into a single string literal.
+  auto EmitToks = [&](ArrayRef<Token> Toks) {
+    size_t TokCount = Toks.size();
+    auto NewToks = std::make_unique<Token[]>(TokCount);
+    llvm::copy(Toks, NewToks.get());
+    EnterTokenStream(std::move(NewToks), TokCount, true, true);
+  };
   if (BinaryContents.empty()) {
-    HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
-                              BinaryContents, TargetCharWidth);
+    // If we have no binary contents, the only thing we need to emit are the
+    // if_empty tokens, if any.
+    // FIXME: this loses AST fidelity; nothing in the compiler will see that
+    // these tokens came from #embed.
+    if (Params.MaybeIfEmptyParam)
+      EmitToks(Params.MaybeIfEmptyParam->Tokens);
     return;
   }
-  SmallVector<char, 2> BinaryPrefix{};
-  SmallVector<char, 2> BinarySuffix{};
+
+  // FIXME: this is not correct; the standard allows *arbitrary* tokens in the
+  // prefix and suffix, but this only accounts for numeric literals and commas,
+  // but nothing else.
+  SmallVector<char, 2> BinaryPrefix, BinarySuffix;
   if (Params.MaybePrefixParam) {
     // If we ahve a prefix, validate that it's a good fit for direct data
     // embedded (and prepare to prepend it)
@@ -4095,7 +4117,7 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
     }
   }
   if (Params.MaybeSuffixParam) {
-    // If we ahve a prefix, validate that it's a good fit for direct data
+    // If we have a prefix, validate that it's a good fit for direct data
     // embedding (and prepare to append it)
     const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
     if (!TokenListIsCharacterArray(*this, TargetCharWidth, false,
@@ -4106,50 +4128,43 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
     }
   }
 
-  // Load up a new embed buffer for this file and set of parameters in
-  // particular.
-  EmbedBuffers.push_back("");
-  size_t EmbedBufferNumber = EmbedBuffers.size();
-  std::string &TargetEmbedBuffer = EmbedBuffers.back();
-  StringRef TypeName = "unsigned char";
-  const size_t TotalSize =
-      BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size();
-  const size_t ReserveSize =        // add up for necessary size:
-      19                            // __builtin_pp_embed(
-      + TypeName.size()             // type-name
-      + 2                           // ,"
-      + ResolvedFilename.size()     // file-name
-      + 3                           // ","
-      + (((TotalSize + 2) / 3) * 4) // base64-string
-      + 2                           // ");
-      ;
-  // Reserve appropriate size
-  TargetEmbedBuffer.reserve(ReserveSize);
+  // Now emit the tokens for the embedded content itself.
+  std::string EncodedContents = llvm::encodeBase64(
+      (Twine(BinaryPrefix) + BinaryContents + Twine(BinarySuffix)).str());
+  auto SetAnnotTok = [](Token &Tok, tok::TokenKind Kind, SourceLocation Loc) {
+    Tok.startToken();
+    Tok.setKind(Kind);
+    Tok.setAnnotationRange(Loc);
+  };
+  auto SetStrTok = [&](Token &Tok, StringRef Contents, SourceLocation Loc) {
+    Tok.startToken();
+    Tok.setKind(tok::string_literal);
+    CreateString(("\"" + Contents + "\"").str(), Tok, Loc, Loc);
+  };
+  constexpr size_t TotalNumToks = 7;
+  auto Toks = std::make_unique<Token[]>(TotalNumToks);
 
-  // Generate the look-alike source file
-  TargetEmbedBuffer.append("__builtin_pp_embed(");
-  TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
-  TargetEmbedBuffer.append(",\"");
-  TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size());
-  TargetEmbedBuffer.append("\",\"");
-  // include the prefix(...) and suffix(...) binary data in the total contents
-  TripleEncodeBase64(
-      StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents,
-      StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer);
-  TargetEmbedBuffer.append("\")");
-  // Create faux-file and its ID, backed by a memory buffer.
-  std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
-      llvm::MemoryBuffer::getMemBufferCopy(
-          TargetEmbedBuffer,
-          "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
-  assert(EmbedMemBuffer && "Cannot create predefined source buffer");
-  FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
-  assert(EmbedBufferFID.isValid() &&
-         "Could not create FileID for #embed directive?");
-  // Start parsing the look-alike source file for the embed directive and
-  // pretend everything is normal
-  // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
-  EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
+  SetAnnotTok(Toks[0], tok::annot_embed_start, HashLoc);
+
+  Toks[1].startToken();
+  Toks[1].setLocation(HashLoc);
+  Toks[1].setKind(tok::kw_unsigned);
+
+  Toks[2].startToken();
+  Toks[2].setLocation(HashLoc);
+  Toks[2].setKind(tok::kw_char);
+
+  SetStrTok(Toks[3], ResolvedFilename, HashLoc);
+
+  Toks[4].startToken();
+  Toks[4].setLocation(HashLoc);
+  Toks[4].setKind(tok::comma);
+
+  SetStrTok(Toks[5], EncodedContents, HashLoc);
+
+  SetAnnotTok(Toks[6], tok::annot_embed_end, HashLoc);
+
+  EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
 }
 
 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
@@ -4257,13 +4272,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                               ParametersRange, MaybeFileRef, SearchPath,
                               RelativePath);
   }
-  if (PPOpts->NoBuiltinPPEmbed) {
-    HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents,
-                              TargetCharWidth);
-  } else {
-    // emit a token directly, handle it internally.
-    HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath,
-                                RelativePath, Params, BinaryContents,
-                                TargetCharWidth);
-  }
+  HandleEmbedDirectiveImpl(HashLoc, FilenameTok, Filename, SearchPath,
+                           RelativePath, Params, BinaryContents,
+                           TargetCharWidth);
 }
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 96bc542effde018..ba4aa59505d3199 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -808,7 +808,6 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 /// [MS]    '__builtin_FUNCSIG' '(' ')'
 /// [GNU]   '__builtin_LINE' '(' ')'
 /// [CLANG] '__builtin_COLUMN' '(' ')'
-/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')'
 /// [GNU]   '__builtin_source_location' '(' ')'
 /// [GNU]   '__builtin_types_compatible_p' '(' type-name ',' type-name ')'
 /// [GNU]   '__null'
@@ -1054,6 +1053,76 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
     break;
   }
 
+  case tok::annot_embed_start: {
+    // The preprocessor has already validated the syntax of the #embed
+    // directive and has produced this series of tokens, so we do not need to
+    // check for syntactic correctness. The form will be:
+    //    type-name string-literal , string-literal
+    //
+    // where the type-name is the type of the elements to embed, the first
+    // string-literal is the file name the user passed to the directive, and
+    // the second string-literal is base64 encoded data from that file.
+    SourceLocation StartLoc = ConsumeAnnotationToken();
+    SourceRange DataTyExprSourceRange;
+    TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
+    ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
+    // There is a comma separating the string literals to prevent them from
+    // combining into a single string literal.
+    ExpectAndConsume(tok::comma);
+    ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
+
+    const ASTContext &Context = Actions.getASTContext();
+    QualType DataTy = DataTyExpr.get().get().getCanonicalType();
+    size_t TargetWidth = Context.getTypeSize(DataTy);
+    if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+        DataTy.getUnqualifiedType() != Context.CharTy) {
+      // TODO: check if is exactly the same as unsigned char
+      Diag(DataTyExprSourceRange.getBegin(),
+           diag::err_builtin_pp_embed_invalid_argument)
+          << "only 'char' and 'unsigned char' are supported";
+      Res = ExprError();
+    }
+    if ((TargetWidth % CHAR_BIT) != 0) {
+      Diag(DataTyExprSourceRange.getBegin(),
+           diag::err_builtin_pp_embed_invalid_argument)
+          << "width of element type is not a multiple of host platform's "
+             "CHAR_BIT!";
+      Res = ExprError();
+    }
+
+    StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+    std::vector<char> BinaryData;
+    StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+    if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
+      Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+          << 0
+          << "'__builtin_pp_embed' with valid base64 encoding that is an "
+             "ordinary \"...\" string";
+    }
+    const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+      Diag(Base64Str->getExprLoc(), diag::err_builtin_pp_embed_invalid_argument)
+          << "expected a valid base64 encoded string";
+    };
+    llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+    llvm::handleAllErrors(std::move(Err), OnDecodeError);
+    if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+      Diag(DataTyExprSourceRange.getBegin(),
+           diag::err_builtin_pp_embed_invalid_argument)
+          << "size of data does not split evently into the number of bytes "
+             "requested";
+      Res = ExprError();
+    }
+
+    // Now we expect the end annotation token.
+    assert(Tok.is(tok::annot_embed_end));
+    SourceLocation EndLoc = ConsumeAnnotationToken();
+    if (!Res.isInvalid()) {
+      Res = Actions.ActOnPPEmbedExpr(
+          StartLoc, Base64ArgExpr.get()->getExprLoc(), EndLoc, FilenameLiteral,
+          DataTy, std::move(BinaryData));
+    }
+  } break;
+
   case tok::kw___super:
   case tok::kw_decltype:
     // Annotate the token and tail recurse.
@@ -1352,7 +1421,6 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
   case tok::kw___builtin_FUNCSIG:
   case tok::kw___builtin_LINE:
   case tok::kw___builtin_source_location:
-  case tok::kw___builtin_pp_embed:
     if (NotPrimaryExpression)
       *NotPrimaryExpression = true;
     // This parses the complete suffix; we can return early.
@@ -2608,7 +2676,6 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
 /// [MS]    '__builtin_FUNCSIG' '(' ')'
 /// [GNU]   '__builtin_LINE' '(' ')'
 /// [CLANG] '__builtin_COLUMN' '(' ')'
-/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')'
 /// [GNU]   '__builtin_source_location' '(' ')'
 /// [OCL]   '__builtin_astype' '(' assignment-expression ',' type-name ')'
 ///
@@ -2877,97 +2944,6 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen());
     break;
   }
-  case tok::kw___builtin_pp_embed: {
-    // __builtin_pp_embed( type-name , string-literal , string-literal )
-    SourceRange DataTyExprSourceRange;
-    TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
-
-    if (DataTyExpr.isInvalid()) {
-      SkipUntil(tok::r_paren, StopAtSemi);
-      return ExprError();
-    }
-
-    if (ExpectAndConsume(tok::comma)) {
-      SkipUntil(tok::r_paren, StopAtSemi);
-      return ExprError();
-    }
-
-    if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
-      Diag(Tok, diag::err_expected_string_literal)
-          << /*as argument*/ 5 << /*second argument*/ 2;
-      SkipUntil(tok::r_paren, StopAtSemi);
-      return ExprError();
-    }
-    ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
-
-    if (FilenameArgExpr.isInvalid() || ExpectAndConsume(tok::comma)) {
-      SkipUntil(tok::r_paren, StopAtSemi);
-      return ExprError();
-    }
-
-    if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
-      Diag(Tok, diag::err_expected_string_literal)
-          << /*as argument*/ 5 << /*third argument*/ 3;
-      SkipUntil(tok::r_paren, StopAtSemi);
-      return ExprError();
-    }
-    ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
-
-    if (Base64ArgExpr.isInvalid() || Tok.isNot(tok::r_paren)) {
-      Diag(Tok, diag::err_expected) << tok::r_paren;
-      return ExprError();
-    }
-
-    const ASTContext &Context = Actions.getASTContext();
-    QualType DataTy = DataTyExpr.get().get().getCanonicalType();
-    size_t TargetWidth = Context.getTypeSize(DataTy);
-    if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
-        DataTy.getUnqualifiedType() != Context.CharTy) {
-      // TODO: check if is exactly the same as unsigned char
-      Diag(DataTyExprSourceRange.getBegin(),
-            diag::err_builtin_pp_embed_invalid_argument)
-          << "only 'char' and 'unsigned char' are supported";
-      Res = ExprError();
-    }
-    if ((TargetWidth % CHAR_BIT) != 0) {
-      Diag(DataTyExprSourceRange.getBegin(),
-            diag::err_builtin_pp_embed_invalid_argument)
-          << "width of element type is not a multiple of host platform's "
-              "CHAR_BIT!";
-      Res = ExprError();
-    }
-
-    StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
-    std::vector<char> BinaryData;
-    StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
-    if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
-      Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
-          << 0
-          << "'__builtin_pp_embed' with valid base64 encoding that is an "
-              "ordinary \"...\" string";
-    }
-    const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
-      Diag(Base64Str->getExprLoc(),
-            diag::err_builtin_pp_embed_invalid_argument)
-          << "expected a valid base64 encoded string";
-    };
-    llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
-    llvm::handleAllErrors(std::move(Err), OnDecodeError);
-    if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
-      Diag(DataTyExprSourceRange.getBegin(),
-            diag::err_builtin_pp_embed_invalid_argument)
-          << "size of data does not split evently into the number of bytes "
-              "requested";
-      Res = ExprError();
-    }
-
-    if (!Res.isInvalid()) {
-      Res = Actions.ActOnPPEmbedExpr(
-          StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(),
-          FilenameLiteral, DataTy, std::move(BinaryData));
-    }
-    break;
-  }
   }
 
   if (Res.isInvalid())
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 6939252078674db..70dc6ad26abec27 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13371,7 +13371,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
         // Expand the list in-place immediately, let the natural work take hold
         Init = ExpandSinglePPEmbedExpr(PPEmbed);
       } else {
-        // `__builtin_pp_embed( ... )` only produces 2 or more values.
+        // #embed only produces 2 or more values.
+        // FIXME: still uses the old builtin name.
         Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type)
             << "'__builtin_pp_embed'";
         RealDecl->setInvalidDecl();
diff --git a/clang/test/Parser/embed_builtin.cpp b/clang/test/Parser/embed_builtin.cpp
deleted file mode 100644
index 487c11c393ad0ee..000000000000000
--- a/clang/test/Parser/embed_builtin.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-
-void parsing_diags() {
-  __builtin_pp_embed;                   // expected-error {{expected '(' after '__builtin_pp_embed'}}
-  __builtin_pp_embed(;                  // expected-error {{expected a type}}
-  __builtin_pp_embed();                 // expected-error {{expected a type}}
-  __builtin_pp_embed(12);               // expected-error {{expected a type}}
-  __builtin_pp_embed(int);              // expected-error {{expected ','}}
-  __builtin_pp_embed(int, 12);          // expected-error {{expected string literal as the 2nd argument}}
-  __builtin_pp_embed(int, "", 12);      // expected-error {{expected string literal as the 3rd argument}}
-  __builtin_pp_embed(int, "", "", 12);  // expected-error {{expected ')'}}
-  (void)__builtin_pp_embed(char, L"", "");    // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
-  (void)__builtin_pp_embed(char, "", L"");    // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
-}
diff --git a/clang/test/Preprocessor/embed_builtin.cpp b/clang/test/Preprocessor/embed_builtin.cpp
deleted file mode 100644
index d2547fa0c3f668d..000000000000000
--- a/clang/test/Preprocessor/embed_builtin.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-// expected-no-diagnostics
-
-#if !__has_builtin(__builtin_pp_embed)
-#error "Don't have __builtin_pp_embed?"
-#endif
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
index 1706ac457e9224c..314cd823e577044 100644
--- a/clang/test/Preprocessor/embed_preprocess_to_file.c
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -7,6 +7,4 @@ const char data[] = {
 #embed <media/art.txt>
 };
 
-// CHECK: # 1 "<built-in:embed:1>" 1
-// CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
-// CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
+// CHECK: "{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg=="

>From c7e1304bc5db1e09bae4d2d70c0cdd8bfef768ab Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 13 Nov 2023 13:24:18 -0500
Subject: [PATCH 29/50] Formatting changes; NFC

---
 clang/include/clang/Lex/Preprocessor.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 8db920ad2dc6610..1d7d2a1e62c2356 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2743,13 +2743,13 @@ class Preprocessor {
                                  const LexEmbedParametersResult &Params,
                                  StringRef BinaryContents,
                                  const size_t TargetCharWidth);
-  void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc,
-                                   const Token &FilenameTok,
-                                   StringRef ResolvedFilename,
-                                   StringRef SearchPath, StringRef RelativePath,
-                                   const LexEmbedParametersResult &Params,
-                                   StringRef BinaryContents,
-                                   const size_t TargetCharWidth);
+  void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
+                                const Token &FilenameTok,
+                                StringRef ResolvedFilename,
+                                StringRef SearchPath, StringRef RelativePath,
+                                const LexEmbedParametersResult &Params,
+                                StringRef BinaryContents,
+                                const size_t TargetCharWidth);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,

>From 8c1a8fb3373f33633197a832f33f3f6fc93becd1 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 08:22:37 -0500
Subject: [PATCH 30/50] Change how we handle prefix and suffix tokens

Previously, we would scan the prefix and suffix tokens to see if they
were a simple sequence of integer literals and commas, and if so, we
would encode their data as part of the binary contents for the embed
expression. If they were not a simple sequence, we would fall back to
the "naive" implementation.

This removes the naive implementation entirely; that would produce six
tokens for every byte in the embedded file:
  ( unsigned char ) <value> ,
which is not going to have acceptable memory overhead for files over a
particular size.

Now, we stream the prefix tokens first, then the embed expression
tokens, then the suffix tokens. This way, the parser always sees the
correct prefix and suffix and non-idiomatic uses won't suffer an
extreme compile-time performance penalty.
---
 clang/include/clang/Lex/Preprocessor.h        |  16 +-
 clang/lib/Lex/PPDirectives.cpp                | 339 ++----------------
 .../Preprocessor/embed_parameter_prefix.c     |  13 +-
 .../Preprocessor/embed_parameter_suffix.c     |  14 +-
 4 files changed, 73 insertions(+), 309 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 1d7d2a1e62c2356..0222e504a156c0c 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1746,6 +1746,17 @@ class Preprocessor {
     SourceLocation EndLoc;
     int UnrecognizedParams;
     bool Successful;
+
+    size_t PrefixTokenCount() const {
+      if (MaybePrefixParam)
+        return MaybePrefixParam->Tokens.size();
+      return 0;
+    }
+    size_t SuffixTokenCount() const {
+      if (MaybeSuffixParam)
+        return MaybeSuffixParam->Tokens.size();
+      return 0;
+    }
   };
 
   LexEmbedParametersResult LexEmbedParameters(Token &Current,
@@ -2738,11 +2749,6 @@ class Preprocessor {
   // Binary data inclusion
   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
                             const FileEntry *LookupFromFile = nullptr);
-  void HandleEmbedDirectiveNaive(SourceLocation HashLoc,
-                                 SourceLocation FilenameTok,
-                                 const LexEmbedParametersResult &Params,
-                                 StringRef BinaryContents,
-                                 const size_t TargetCharWidth);
   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
                                 const Token &FilenameTok,
                                 StringRef ResolvedFilename,
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 78269021484824d..9f0b82af925b95d 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3815,261 +3815,6 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
   return Result;
 }
 
-// This array must survive for an extended period of time
-inline constexpr const char *IntegerLiterals[] = {
-    "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",   "10",
-    "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",  "20",  "21",
-    "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",  "30",  "31",  "32",
-    "33",  "34",  "35",  "36",  "37",  "38",  "39",  "40",  "41",  "42",  "43",
-    "44",  "45",  "46",  "47",  "48",  "49",  "50",  "51",  "52",  "53",  "54",
-    "55",  "56",  "57",  "58",  "59",  "60",  "61",  "62",  "63",  "64",  "65",
-    "66",  "67",  "68",  "69",  "70",  "71",  "72",  "73",  "74",  "75",  "76",
-    "77",  "78",  "79",  "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",
-    "88",  "89",  "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",
-    "99",  "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
-    "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120",
-    "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131",
-    "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142",
-    "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153",
-    "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164",
-    "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175",
-    "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186",
-    "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197",
-    "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208",
-    "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
-    "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230",
-    "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241",
-    "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
-    "253", "254", "255"};
-
-static size_t
-ComputeNaiveReserveSize(const Preprocessor::LexEmbedParametersResult &Params,
-                        StringRef TypeName, StringRef BinaryContents,
-                        SmallVectorImpl<char> &TokSpellingBuffer) {
-  size_t ReserveSize = 0;
-  if (BinaryContents.empty()) {
-    if (Params.MaybeIfEmptyParam) {
-      for (const auto &Tok : Params.MaybeIfEmptyParam->Tokens) {
-        const size_t TokLen = Tok.getLength();
-        if (TokLen > TokSpellingBuffer.size()) {
-          TokSpellingBuffer.resize(TokLen);
-        }
-        ReserveSize += TokLen;
-      }
-    }
-  } else {
-    if (Params.MaybePrefixParam) {
-      for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
-        const size_t TokLen = Tok.getLength();
-        if (TokLen > TokSpellingBuffer.size()) {
-          TokSpellingBuffer.resize(TokLen);
-        }
-        ReserveSize += TokLen;
-      }
-    }
-    for (const auto &Byte : BinaryContents) {
-      ReserveSize += 3 + TypeName.size(); // ((type-name)
-      if (Byte > 99) {
-        ReserveSize += 3; // ###
-      } else if (Byte > 9) {
-        ReserveSize += 2; // ##
-      } else {
-        ReserveSize += 1; // #
-      }
-      ReserveSize += 2; // ),
-    }
-    if (Params.MaybePrefixParam) {
-      for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
-        const size_t TokLen = Tok.getLength();
-        if (TokLen > TokSpellingBuffer.size()) {
-          TokSpellingBuffer.resize(TokLen);
-        }
-        ReserveSize += TokLen;
-      }
-    }
-  }
-  return ReserveSize;
-}
-
-void Preprocessor::HandleEmbedDirectiveNaive(
-    SourceLocation HashLoc, SourceLocation FilenameLoc,
-    const LexEmbedParametersResult &Params, StringRef BinaryContents,
-    const size_t TargetCharWidth) {
-  // Load up a new embed buffer for this file and set of parameters in
-  // particular.
-  EmbedBuffers.push_back("");
-  size_t EmbedBufferNumber = EmbedBuffers.size();
-  std::string &TargetEmbedBuffer = EmbedBuffers.back();
-  const size_t TotalSize = BinaryContents.size();
-  // In the future, this might change/improve.
-  const StringRef TypeName = "unsigned char";
-
-  SmallVector<char, 32> TokSpellingBuffer(32, 0);
-  const size_t ReserveSize = ComputeNaiveReserveSize(
-      Params, TypeName, BinaryContents, TokSpellingBuffer);
-  TargetEmbedBuffer.reserve(ReserveSize);
-
-  // Generate the look-alike source file
-  if (BinaryContents.empty()) {
-    if (Params.MaybeIfEmptyParam) {
-      const PPEmbedParameterIfEmpty &EmptyParam = *Params.MaybeIfEmptyParam;
-      for (const auto &Tok : EmptyParam.Tokens) {
-        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
-        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
-      }
-    }
-  } else {
-    if (Params.MaybePrefixParam) {
-      const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
-      for (const auto &Tok : PrefixParam.Tokens) {
-        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
-        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
-      }
-    }
-    for (size_t I = 0; I < TotalSize; ++I) {
-      unsigned char ByteValue = BinaryContents[I];
-      StringRef ByteRepresentation = IntegerLiterals[ByteValue];
-      TargetEmbedBuffer.append(2, '(');
-      TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
-      TargetEmbedBuffer.append(1, ')');
-      TargetEmbedBuffer.append(ByteRepresentation.data(),
-                               ByteRepresentation.size());
-      TargetEmbedBuffer.append(1, ')');
-      bool AtEndOfContents = I == (TotalSize - 1);
-      if (!AtEndOfContents) {
-        TargetEmbedBuffer.append(1, ',');
-      }
-    }
-    if (Params.MaybeSuffixParam) {
-      const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
-      for (const auto &Tok : SuffixParam.Tokens) {
-        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
-        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
-      }
-    }
-  }
-
-  // Create faux-file and its ID, backed by a memory buffer.
-  std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
-      llvm::MemoryBuffer::getMemBufferCopy(
-          TargetEmbedBuffer,
-          "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
-  assert(EmbedMemBuffer && "Cannot create predefined source buffer");
-  FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
-  assert(EmbedBufferFID.isValid() &&
-         "Could not create FileID for #embed directive?");
-  // Start parsing the look-alike source file for the embed directive and
-  // pretend everything is normal
-  // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
-  EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
-}
-
-static bool TokenListIsCharacterArray(Preprocessor &PP,
-                                      const size_t TargetCharWidth,
-                                      bool IsPrefix,
-                                      const SmallVectorImpl<Token> &Tokens,
-                                      llvm::SmallVectorImpl<char> &Output) {
-  const bool IsSuffix = !IsPrefix;
-  size_t MaxValue =
-      static_cast<size_t>(std::pow((size_t)2, TargetCharWidth)) - 1u;
-  size_t TokenIndex = 0;
-  // if it's a suffix, we are expecting a comma first
-  // if it's a prefix, we are expecting a numeric literal first
-  bool ExpectingNumericLiteral = IsPrefix;
-  const size_t TokensSize = Tokens.size();
-  if (Tokens.empty()) {
-    return true;
-  }
-  for (; TokenIndex < TokensSize;
-       (void)++TokenIndex, ExpectingNumericLiteral = !ExpectingNumericLiteral) {
-    const Token &Tok = Tokens[TokenIndex];
-    // TODO: parse an optional, PLAIN `(unsigned char)` cast in front of the
-    // literals, since the Spec technically decrees each element is of type
-    // `unsigned char` (unless we have a potential future extension for
-    // `clang::type(meow)` as an embed parameter
-    if (ExpectingNumericLiteral) {
-      if (Tok.isNot(tok::numeric_constant)) {
-        return false;
-      }
-      uint64_t Value = {};
-      Token ParsingTok = Tok;
-      if (!PP.parseSimpleIntegerLiteral(ParsingTok, Value, false)) {
-        // numeric literal is a floating point literal or a UDL; too complex for
-        // us
-        return false;
-      }
-      if (Value > MaxValue || Value > static_cast<uint64_t>(0xFF)) {
-        // number is too large
-        return false;
-      }
-      Output.push_back((char)Value);
-    } else {
-      if (Tok.isNot(tok::comma)) {
-        return false;
-      }
-    }
-  }
-  const bool EndedOnNumber = !ExpectingNumericLiteral;
-  if (IsPrefix && EndedOnNumber) {
-    // we ended on a number: this is a failure for prefix!
-    return false;
-  }
-  const bool EndedOnComma = ExpectingNumericLiteral;
-  if (IsSuffix && EndedOnComma) {
-    // we ended on a comma: this is a failure for suffix!
-    return false;
-  }
-  // if all tokens have been consumed by the above process, then we have
-  // succeeded.
-  return TokenIndex == TokensSize;
-}
-
-static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1,
-                               StringRef Bytes2, std::string &OutputBuffer) {
-  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-                              "abcdefghijklmnopqrstuvwxyz"
-                              "0123456789+/";
-  const size_t TotalSize = Bytes0.size() + Bytes1.size() + Bytes2.size();
-  const size_t Bytes0Size = Bytes0.size();
-  const size_t Bytes01Size = Bytes0.size() + Bytes1.size();
-  const size_t IndexOffset = OutputBuffer.size();
-  OutputBuffer.resize(OutputBuffer.size() + (((TotalSize + 2) / 3) * 4));
-  auto IndexInto = [&](size_t i) -> unsigned char {
-    if (i >= Bytes0Size) {
-      if (i >= Bytes01Size) {
-        return Bytes2[i - Bytes01Size];
-      }
-      return Bytes1[i - Bytes0Size];
-    }
-    return Bytes0[i];
-  };
-
-  size_t i = 0, j = 0;
-  for (size_t n = TotalSize / 3 * 3; i < n; i += 3, j += 4) {
-    uint32_t x = ((unsigned char)IndexInto(i) << 16) |
-                 ((unsigned char)IndexInto(i + 1) << 8) |
-                 (unsigned char)IndexInto(i + 2);
-    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
-    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
-    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
-    OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
-  }
-  if (i + 1 == TotalSize) {
-    uint32_t x = ((unsigned char)IndexInto(i) << 16);
-    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
-    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
-    OutputBuffer[IndexOffset + j + 2] = '=';
-    OutputBuffer[IndexOffset + j + 3] = '=';
-  } else if (i + 2 == TotalSize) {
-    uint32_t x = ((unsigned char)IndexInto(i) << 16) |
-                 ((unsigned char)IndexInto(i + 1) << 8);
-    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
-    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
-    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
-    OutputBuffer[IndexOffset + j + 3] = '=';
-  }
-}
-
 void Preprocessor::HandleEmbedDirectiveImpl(
     SourceLocation HashLoc, const Token &FilenameTok,
     StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
@@ -4077,9 +3822,11 @@ void Preprocessor::HandleEmbedDirectiveImpl(
     const size_t TargetCharWidth) {
   // Pass off the annotation token stream. The parser expects:
   //   if_empty-tokens or
-  //   embed-annotation-start
-  //     type-name string-literal , string-literal
-  //   embed-annotation-stop
+  //     prefix-tokens (if any)
+  //     embed-annotation-start
+  //       type-name string-literal , string-literal
+  //     embed-annotation-stop
+  //     suffix-tokens (if any)
   // where the type-name is the type used for each element to embed, the first
   // string-literal is the resolved file name of the file we loaded contents
   // from, and the second string-literal is the base64 encoded data we loaded
@@ -4101,36 +3848,6 @@ void Preprocessor::HandleEmbedDirectiveImpl(
     return;
   }
 
-  // FIXME: this is not correct; the standard allows *arbitrary* tokens in the
-  // prefix and suffix, but this only accounts for numeric literals and commas,
-  // but nothing else.
-  SmallVector<char, 2> BinaryPrefix, BinarySuffix;
-  if (Params.MaybePrefixParam) {
-    // If we ahve a prefix, validate that it's a good fit for direct data
-    // embedded (and prepare to prepend it)
-    const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
-    if (!TokenListIsCharacterArray(*this, TargetCharWidth, true,
-                                   PrefixParam.Tokens, BinaryPrefix)) {
-      HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
-                                BinaryContents, TargetCharWidth);
-      return;
-    }
-  }
-  if (Params.MaybeSuffixParam) {
-    // If we have a prefix, validate that it's a good fit for direct data
-    // embedding (and prepare to append it)
-    const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
-    if (!TokenListIsCharacterArray(*this, TargetCharWidth, false,
-                                   SuffixParam.Tokens, BinarySuffix)) {
-      HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
-                                BinaryContents, TargetCharWidth);
-      return;
-    }
-  }
-
-  // Now emit the tokens for the embedded content itself.
-  std::string EncodedContents = llvm::encodeBase64(
-      (Twine(BinaryPrefix) + BinaryContents + Twine(BinarySuffix)).str());
   auto SetAnnotTok = [](Token &Tok, tok::TokenKind Kind, SourceLocation Loc) {
     Tok.startToken();
     Tok.setKind(Kind);
@@ -4141,29 +3858,47 @@ void Preprocessor::HandleEmbedDirectiveImpl(
     Tok.setKind(tok::string_literal);
     CreateString(("\"" + Contents + "\"").str(), Tok, Loc, Loc);
   };
-  constexpr size_t TotalNumToks = 7;
+
+  size_t NumPrefixToks = Params.PrefixTokenCount(),
+         NumSuffixToks = Params.SuffixTokenCount();
+  size_t TotalNumToks = 7 + NumPrefixToks + NumSuffixToks;
+  size_t CurIdx = 0;
   auto Toks = std::make_unique<Token[]>(TotalNumToks);
 
-  SetAnnotTok(Toks[0], tok::annot_embed_start, HashLoc);
+  // Add the prefix tokens, if any.
+  if (Params.MaybePrefixParam) {
+    llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
+    CurIdx += NumPrefixToks;
+  }
 
-  Toks[1].startToken();
-  Toks[1].setLocation(HashLoc);
-  Toks[1].setKind(tok::kw_unsigned);
+  // Now annotate the embed itself.
+  SetAnnotTok(Toks[CurIdx++], tok::annot_embed_start, HashLoc);
 
-  Toks[2].startToken();
-  Toks[2].setLocation(HashLoc);
-  Toks[2].setKind(tok::kw_char);
+  Toks[CurIdx].startToken();
+  Toks[CurIdx].setLocation(HashLoc);
+  Toks[CurIdx++].setKind(tok::kw_unsigned);
 
-  SetStrTok(Toks[3], ResolvedFilename, HashLoc);
+  Toks[CurIdx].startToken();
+  Toks[CurIdx].setLocation(HashLoc);
+  Toks[CurIdx++].setKind(tok::kw_char);
 
-  Toks[4].startToken();
-  Toks[4].setLocation(HashLoc);
-  Toks[4].setKind(tok::comma);
+  SetStrTok(Toks[CurIdx++], ResolvedFilename, HashLoc);
 
-  SetStrTok(Toks[5], EncodedContents, HashLoc);
+  Toks[CurIdx].startToken();
+  Toks[CurIdx].setLocation(HashLoc);
+  Toks[CurIdx++].setKind(tok::comma);
 
-  SetAnnotTok(Toks[6], tok::annot_embed_end, HashLoc);
+  SetStrTok(Toks[CurIdx++], llvm::encodeBase64(BinaryContents), HashLoc);
+
+  SetAnnotTok(Toks[CurIdx++], tok::annot_embed_end, HashLoc);
+
+  // Now add the suffix tokens, if any.
+  if (Params.MaybeSuffixParam) {
+    llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
+    CurIdx += NumSuffixToks;
+  }
 
+  assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
   EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
 }
 
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
index 5182a2b874d3991..e806d6d4c877296 100644
--- a/clang/test/Preprocessor/embed_parameter_prefix.c
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <single_byte.txt> prefix('\xA', )
@@ -12,4 +13,14 @@ _Static_assert('\xA' == data[0], "");
 _Static_assert('b' == data[1], "");
 _Static_assert(sizeof(empty_data) == 1, "");
 _Static_assert(1 == empty_data[0], "");
-// expected-no-diagnostics
+
+struct S {
+  int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> prefix( .x = 100, .y = 10, )
+};
+_Static_assert(s.x == 100, "");
+_Static_assert(s.y == 10, "");
+_Static_assert(s.z == 'b', "");
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
index 11c3f2bbbfb2bb6..59c21d71750cdea 100644
--- a/clang/test/Preprocessor/embed_parameter_suffix.c
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <single_byte.txt> suffix(, '\xA')
@@ -12,4 +13,15 @@ _Static_assert('b' == data[0], "");
 _Static_assert('\xA' == data[1], "");
 _Static_assert(sizeof(empty_data) == 1, "");
 _Static_assert(1 == empty_data[0], "");
-// expected-no-diagnostics
+
+struct S {
+  int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> suffix( , .y = 100, .z = 10 )
+};
+
+_Static_assert(s.x == 'b', "");
+_Static_assert(s.y == 100, "");
+_Static_assert(s.z == 10, "");

>From 14d08b621bbcd53a9de7a826ddeb690579ec1c25 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 08:33:25 -0500
Subject: [PATCH 31/50] Add some FIXME comments about AST fidelity; NFC

---
 clang/lib/Lex/PPDirectives.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9f0b82af925b95d..b435af9a1e312fc 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3958,6 +3958,9 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   }
   std::optional<int64_t> MaybeSignedLimit{};
   if (Params.MaybeLimitParam) {
+    // FIXME: just like with the clang::offset() and if_empty() parameters,
+    // this loses source fidelity in the AST; it has no idea there was a limit
+    // involved.
     MaybeSignedLimit = static_cast<int64_t>(Params.MaybeLimitParam->Limit);
   }
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile =
@@ -3971,6 +3974,9 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   }
   StringRef BinaryContents = MaybeFile.get()->getBuffer();
   if (Params.MaybeOffsetParam) {
+    // FIXME: just like with the limit() and if_empty() parameters, this loses
+    // source fidelity in the AST; it has no idea that there was an offset
+    // involved.
     // offsets all the way to the end of the file make for an empty file.
     const size_t &OffsetParam = Params.MaybeOffsetParam->Offset;
     BinaryContents = BinaryContents.substr(OffsetParam);

>From 16cfd3171fb67919c204cba387b4d120f66b9a79 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 08:45:01 -0500
Subject: [PATCH 32/50] Backing out an unneeded change; NFC

This was added in an earlier refactoring to support diagnosing issues
with calls to __builtin__pp_embed, no longer needed now that we've
removed the builtin.
---
 clang/include/clang/Basic/DiagnosticCommonKinds.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 265bccbe183ef81..e66a377be9ae2bf 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -57,7 +57,7 @@ def err_expected_string_literal : Error<"expected string literal "
           "for optional message in 'availability' attribute|"
           "for %select{language name|source container name|USR}1 in "
           "'external_source_symbol' attribute|"
-          "as argument of '%1' attribute|as the %ordinal1 argument}0">;
+          "as argument of '%1' attribute}0">;
 
 def err_builtin_pp_embed_invalid_argument : Error<
   "invalid argument to '__builtin_pp_embed': %0">;

>From 528077e480a11261097f91faa6e0b2d65970f61c Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 09:51:04 -0500
Subject: [PATCH 33/50] Correct diagnostic behavior for ext and compat warnings

The previous diagnostic wording was incorrect and untested; this
updates the wording, changes the groups the diagnostics are in, adds an
explicit test for the diagnostics, and corrects test RUN lines to
specify the standards mode (to disable diagnostics we don't intend to
test).
---
 .../include/clang/Basic/DiagnosticLexKinds.td |  26 ++--
 clang/lib/Lex/PPDirectives.cpp                |  11 +-
 clang/lib/Lex/PPMacroExpansion.cpp            |  12 +-
 clang/test/Preprocessor/embed___has_embed.c   |   4 +-
 .../embed___has_embed_supported.c             |   2 +-
 clang/test/Preprocessor/embed_art.c           | 120 +++++++++---------
 .../Preprocessor/embed_ext_compat_diags.c     |  18 +++
 .../test/Preprocessor/embed_file_not_found.c  |   2 +-
 clang/test/Preprocessor/embed_init.c          |  12 +-
 .../Preprocessor/embed_parameter_if_empty.c   |  18 +--
 .../test/Preprocessor/embed_parameter_limit.c |  16 +--
 .../Preprocessor/embed_parameter_offset.c     |  16 +--
 .../Preprocessor/embed_parameter_prefix.c     |  18 +--
 .../Preprocessor/embed_parameter_suffix.c     |  18 +--
 .../embed_parameter_unrecognized.c            |   2 +-
 clang/test/Preprocessor/embed_path_chevron.c  |   8 +-
 clang/test/Preprocessor/embed_path_quote.c    |   8 +-
 clang/test/Preprocessor/embed_weird.cpp       |   4 +-
 18 files changed, 163 insertions(+), 152 deletions(-)
 create mode 100644 clang/test/Preprocessor/embed_ext_compat_diags.c

diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index eaf04700a54d40b..97d3856b4fc92e9 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -422,22 +422,16 @@ def warn_cxx23_compat_warning_directive : Warning<
 def warn_c23_compat_warning_directive : Warning<
   "#warning is incompatible with C standards before C23">,
   InGroup<CPre23Compat>, DefaultIgnore;
-def warn_c23_pp_embed : Warning<
-  "'__has_embed' is a C23 extension">,
-  InGroup<CPre23Compat>,
-  DefaultIgnore;
-def warn_c23_pp_has_embed : Warning<
-  "'__has_embed' is a C23 extension">,
-  InGroup<CPre23Compat>,
-  DefaultIgnore;
-def warn_cxx26_pp_embed : Warning<
-  "'__has_embed' is a C++26 extension">,
-  InGroup<CXXPre26Compat>,
-  DefaultIgnore;
-def warn_cxx26_pp_has_embed : Warning<
-  "'__has_embed' is a C++26 extension">,
-  InGroup<CXXPre26Compat>,
-  DefaultIgnore;
+def ext_pp_embed_directive : ExtWarn<
+  "'#embed' is a %select{C23|Clang}0 extension">, InGroup<C23>;
+def warn_compat_pp_embed_directive : Warning<
+  "'#embed' is incompatible with C standards before C23">,
+  InGroup<CPre23Compat>, DefaultIgnore;
+def ext_pp_has_embed : ExtWarn<
+  "'__has_embed' is a %select{C23|Clang}0 extension">, InGroup<C23>;
+def warn_compat_pp_has_embed : Warning<
+  "'__has_embed' is incompatible with C standards before C23">,
+  InGroup<CPre23Compat>, DefaultIgnore;
 
 def ext_pp_extra_tokens_at_eol : ExtWarn<
   "extra tokens at end of #%0 directive">, InGroup<ExtraTokens>;
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index b435af9a1e312fc..19f49db6c6c61d3 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3904,11 +3904,12 @@ void Preprocessor::HandleEmbedDirectiveImpl(
 
 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                                         const FileEntry *LookupFromFile) {
-  if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
-    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_embed
-                                          : diag::warn_c23_pp_embed);
-    Diag(EmbedTok, EitherDiag);
-  }
+  // Give the usual extension/compatibility warnings.
+  if (LangOpts.C23)
+    Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
+  else
+    Diag(EmbedTok, diag::ext_pp_embed_directive)
+        << (LangOpts.CPlusPlus ? /*Clang*/1 : /*C23*/0);
 
   // Parse the filename header
   Token FilenameTok;
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 3ff3055a17c4e90..79e521649c747fe 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1273,12 +1273,12 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
 /// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
 /// Returns a filled optional with the value if successful; otherwise, empty.
 EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
-  // pedwarn for not being on C23
-  if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
-    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
-                                          : diag::warn_c23_pp_has_embed);
-    Diag(Tok, EitherDiag);
-  }
+  // Give the usual extension/compatibility warnings.
+  if (LangOpts.C23)
+    Diag(Tok, diag::warn_compat_pp_has_embed);
+  else
+    Diag(Tok, diag::ext_pp_has_embed)
+        << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
 
   // Save the location of the current token.  If a '(' is later found, use
   // that location.  If not, use the end of this location instead.
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
index 80980e753614a5d..d63a06372c6d1e4 100644
--- a/clang/test/Preprocessor/embed___has_embed.c
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -E -embed-dir=%S/Inputs -CC -verify
+// RUN: %clang_cc1 -std=c23 %s -E -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
 
 #if !__has_embed(__FILE__)
 #error 1
@@ -31,4 +32,3 @@
 #elif !__has_embed(<media/art.txt> if_empty(meow))
 #error 14
 #endif
-// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
index fe0edb00e609837..f972762e256770e 100644
--- a/clang/test/Preprocessor/embed___has_embed_supported.c
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -std=c23 %s -E -verify
 
 #if !__has_embed(__FILE__)
 #error 1
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
index 1639fb7af7f07b0..4b30bf41ab54f72 100644
--- a/clang/test/Preprocessor/embed_art.c
+++ b/clang/test/Preprocessor/embed_art.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <media/art.txt>
@@ -15,25 +15,25 @@ const char data4[] = {
 #embed <media/art.txt> suffix(,)
 0
 };
-_Static_assert(sizeof(data) == 274, "");
-_Static_assert(' ' == data[0], "");
-_Static_assert('_' == data[11], "");
-_Static_assert('\n' == data[273], "");
-_Static_assert(sizeof(data2) == 275, "");
-_Static_assert(' ' == data2[0], "");
-_Static_assert('_' == data2[11], "");
-_Static_assert('\n' == data2[273], "");
-_Static_assert('\0' == data2[274], "");
-_Static_assert(sizeof(data3) == 275, "");
-_Static_assert(' ' == data3[0], "");
-_Static_assert('_' == data3[11], "");
-_Static_assert('\n' == data3[273], "");
-_Static_assert('\0' == data3[274], "");
-_Static_assert(sizeof(data4) == 275, "");
-_Static_assert(' ' == data4[0], "");
-_Static_assert('_' == data4[11], "");
-_Static_assert('\n' == data4[273], "");
-_Static_assert('\0' == data4[274], "");
+static_assert(sizeof(data) == 274);
+static_assert(' ' == data[0]);
+static_assert('_' == data[11]);
+static_assert('\n' == data[273]);
+static_assert(sizeof(data2) == 275);
+static_assert(' ' == data2[0]);
+static_assert('_' == data2[11]);
+static_assert('\n' == data2[273]);
+static_assert('\0' == data2[274]);
+static_assert(sizeof(data3) == 275);
+static_assert(' ' == data3[0]);
+static_assert('_' == data3[11]);
+static_assert('\n' == data3[273]);
+static_assert('\0' == data3[274]);
+static_assert(sizeof(data4) == 275);
+static_assert(' ' == data4[0]);
+static_assert('_' == data4[11]);
+static_assert('\n' == data4[273]);
+static_assert('\0' == data4[274]);
 
 const signed char data5[] = {
 #embed <media/art.txt>
@@ -49,25 +49,25 @@ const signed char data8[] = {
 #embed <media/art.txt> suffix(,)
 0
 };
-_Static_assert(sizeof(data5) == 274, "");
-_Static_assert(' ' == data5[0], "");
-_Static_assert('_' == data5[11], "");
-_Static_assert('\n' == data5[273], "");
-_Static_assert(sizeof(data6) == 275, "");
-_Static_assert(' ' == data6[0], "");
-_Static_assert('_' == data6[11], "");
-_Static_assert('\n' == data6[273], "");
-_Static_assert('\0' == data6[274], "");
-_Static_assert(sizeof(data7) == 275, "");
-_Static_assert(' ' == data7[0], "");
-_Static_assert('_' == data7[11], "");
-_Static_assert('\n' == data7[273], "");
-_Static_assert('\0' == data7[274], "");
-_Static_assert(sizeof(data8) == 275, "");
-_Static_assert(' ' == data8[0], "");
-_Static_assert('_' == data8[11], "");
-_Static_assert('\n' == data8[273], "");
-_Static_assert('\0' == data8[274], "");
+static_assert(sizeof(data5) == 274);
+static_assert(' ' == data5[0]);
+static_assert('_' == data5[11]);
+static_assert('\n' == data5[273]);
+static_assert(sizeof(data6) == 275);
+static_assert(' ' == data6[0]);
+static_assert('_' == data6[11]);
+static_assert('\n' == data6[273]);
+static_assert('\0' == data6[274]);
+static_assert(sizeof(data7) == 275);
+static_assert(' ' == data7[0]);
+static_assert('_' == data7[11]);
+static_assert('\n' == data7[273]);
+static_assert('\0' == data7[274]);
+static_assert(sizeof(data8) == 275);
+static_assert(' ' == data8[0]);
+static_assert('_' == data8[11]);
+static_assert('\n' == data8[273]);
+static_assert('\0' == data8[274]);
 
 const unsigned char data9[] = {
 #embed <media/art.txt>
@@ -83,24 +83,22 @@ const unsigned char data12[] = {
 0
 #embed <media/art.txt> prefix(,)
 };
-_Static_assert(sizeof(data9) == 274, "");
-_Static_assert(' ' == data9[0], "");
-_Static_assert('_' == data9[11], "");
-_Static_assert('\n' == data9[273], "");
-_Static_assert(sizeof(data10) == 275, "");
-_Static_assert(' ' == data10[1], "");
-_Static_assert('_' == data10[12], "");
-_Static_assert('\n' == data10[274], "");
-_Static_assert('\0' == data10[0], "");
-_Static_assert(sizeof(data11) == 275, "");
-_Static_assert(' ' == data11[1], "");
-_Static_assert('_' == data11[12], "");
-_Static_assert('\n' == data11[274], "");
-_Static_assert('\0' == data11[0], "");
-_Static_assert(sizeof(data12) == 275, "");
-_Static_assert(' ' == data12[1], "");
-_Static_assert('_' == data12[12], "");
-_Static_assert('\n' == data12[274], "");
-_Static_assert('\0' == data12[0], "");
-
-// expected-no-diagnostics
+static_assert(sizeof(data9) == 274);
+static_assert(' ' == data9[0]);
+static_assert('_' == data9[11]);
+static_assert('\n' == data9[273]);
+static_assert(sizeof(data10) == 275);
+static_assert(' ' == data10[1]);
+static_assert('_' == data10[12]);
+static_assert('\n' == data10[274]);
+static_assert('\0' == data10[0]);
+static_assert(sizeof(data11) == 275);
+static_assert(' ' == data11[1]);
+static_assert('_' == data11[12]);
+static_assert('\n' == data11[274]);
+static_assert('\0' == data11[0]);
+static_assert(sizeof(data12) == 275);
+static_assert(' ' == data12[1]);
+static_assert('_' == data12[12]);
+static_assert('\n' == data12[274]);
+static_assert('\0' == data12[0]);
diff --git a/clang/test/Preprocessor/embed_ext_compat_diags.c b/clang/test/Preprocessor/embed_ext_compat_diags.c
new file mode 100644
index 000000000000000..f33236e3b46f340
--- /dev/null
+++ b/clang/test/Preprocessor/embed_ext_compat_diags.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify=none -pedantic
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat
+// RUN: %clang_cc1 -std=c17 %s -fsyntax-only -embed-dir=%S/Inputs -verify=ext -pedantic
+// RUN: %clang_cc1 -x c++ %s -fsyntax-only -embed-dir=%S/Inputs -verify=cxx -pedantic
+// none-no-diagnostics
+
+#if __has_embed("jk.txt") /* compat-warning {{'__has_embed' is incompatible with C standards before C23}}
+                             ext-warning {{'__has_embed' is a C23 extension}}
+                             cxx-warning {{'__has_embed' is a Clang extension}}
+                           */
+const char buffer[] = {
+#embed "jk.txt" /* compat-warning {{'#embed' is incompatible with C standards before C23}}
+                   ext-warning {{'#embed' is a C23 extension}}
+                   cxx-warning {{'#embed' is a Clang extension}}
+                 */
+};
+#endif
+
diff --git a/clang/test/Preprocessor/embed_file_not_found.c b/clang/test/Preprocessor/embed_file_not_found.c
index 337fa4ac067ec71..472222aafa55a0d 100644
--- a/clang/test/Preprocessor/embed_file_not_found.c
+++ b/clang/test/Preprocessor/embed_file_not_found.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -std=c23 %s -E -verify
 
 #embed <nfejfNejAKFe>
 // expected-error at -1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
index cd517b7f216ac32..d1751c78ed6f8f0 100644
--- a/clang/test/Preprocessor/embed_init.c
+++ b/clang/test/Preprocessor/embed_init.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
 
 typedef struct kitty {
 	int purr;
@@ -21,8 +22,7 @@ const kitty_kitty kit_kit = {
 #embed <jk.txt>
 };
 
-_Static_assert(meow == 'b', "");
-_Static_assert(kit.purr == 'b', "");
-_Static_assert(kit_kit.here == 'j', "");
-_Static_assert(kit_kit.kit.purr == 'k', "");
-// expected-no-diagnostics
+static_assert(meow == 'b');
+static_assert(kit.purr == 'b');
+static_assert(kit_kit.here == 'j');
+static_assert(kit_kit.kit.purr == 'k');
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
index ac1a768b27ffff9..2f91ff363d3dbc1 100644
--- a/clang/test/Preprocessor/embed_parameter_if_empty.c
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <media/empty> if_empty(123, 124, 125)
@@ -6,11 +7,10 @@ const char data[] = {
 const char non_empty_data[] = {
 #embed <jk.txt> if_empty(123, 124, 125)
 };
-_Static_assert(sizeof(data) == 3, "");
-_Static_assert(123 == data[0], "");
-_Static_assert(124 == data[1], "");
-_Static_assert(125 == data[2], "");
-_Static_assert(sizeof(non_empty_data) == 2, "");
-_Static_assert('j' == non_empty_data[0], "");
-_Static_assert('k' == non_empty_data[1], "");
-// expected-no-diagnostics
+static_assert(sizeof(data) == 3);
+static_assert(123 == data[0]);
+static_assert(124 == data[1]);
+static_assert(125 == data[2]);
+static_assert(sizeof(non_empty_data) == 2);
+static_assert('j' == non_empty_data[0]);
+static_assert('k' == non_empty_data[1]);
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
index 28a94fe9430f033..186137953d1ce1c 100644
--- a/clang/test/Preprocessor/embed_parameter_limit.c
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <jk.txt>
@@ -6,10 +7,9 @@ const char data[] = {
 const char offset_data[] = {
 #embed <jk.txt> limit(1)
 };
-_Static_assert(sizeof(data) == 2, "");
-_Static_assert('j' == data[0], "");
-_Static_assert('k' == data[1], "");
-_Static_assert(sizeof(offset_data) == 1, "");
-_Static_assert('j' == offset_data[0], "");
-_Static_assert(offset_data[0] == data[0], "");
-// expected-no-diagnostics
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('j' == offset_data[0]);
+static_assert(offset_data[0] == data[0]);
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
index 71a029544dca556..a18c6fde01f6e6c 100644
--- a/clang/test/Preprocessor/embed_parameter_offset.c
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <jk.txt>
@@ -6,10 +7,9 @@ const char data[] = {
 const char offset_data[] = {
 #embed <jk.txt> clang::offset(1)
 };
-_Static_assert(sizeof(data) == 2, "");
-_Static_assert('j' == data[0], "");
-_Static_assert('k' == data[1], "");
-_Static_assert(sizeof(offset_data) == 1, "");
-_Static_assert('k' == offset_data[0], "");
-_Static_assert(offset_data[0] == data[1], "");
-// expected-no-diagnostics
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('k' == offset_data[0]);
+static_assert(offset_data[0] == data[1]);
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
index e806d6d4c877296..9b23c99d04a8fb3 100644
--- a/clang/test/Preprocessor/embed_parameter_prefix.c
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
@@ -8,11 +8,11 @@ const char empty_data[] = {
 #embed <media/empty> prefix('\xA', )
 1
 };
-_Static_assert(sizeof(data) == 2, "");
-_Static_assert('\xA' == data[0], "");
-_Static_assert('b' == data[1], "");
-_Static_assert(sizeof(empty_data) == 1, "");
-_Static_assert(1 == empty_data[0], "");
+static_assert(sizeof(data) == 2);
+static_assert('\xA' == data[0]);
+static_assert('b' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
 
 struct S {
   int x, y, z;
@@ -21,6 +21,6 @@ struct S {
 const struct S s = {
 #embed <single_byte.txt> prefix( .x = 100, .y = 10, )
 };
-_Static_assert(s.x == 100, "");
-_Static_assert(s.y == 10, "");
-_Static_assert(s.z == 'b', "");
+static_assert(s.x == 100);
+static_assert(s.y == 10);
+static_assert(s.z == 'b');
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
index 59c21d71750cdea..26e0fd2ca82aa38 100644
--- a/clang/test/Preprocessor/embed_parameter_suffix.c
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
@@ -8,11 +8,11 @@ const char empty_data[] = {
 #embed <media/empty> suffix(, '\xA')
 1
 };
-_Static_assert(sizeof(data) == 2, "");
-_Static_assert('b' == data[0], "");
-_Static_assert('\xA' == data[1], "");
-_Static_assert(sizeof(empty_data) == 1, "");
-_Static_assert(1 == empty_data[0], "");
+static_assert(sizeof(data) == 2);
+static_assert('b' == data[0]);
+static_assert('\xA' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
 
 struct S {
   int x, y, z;
@@ -22,6 +22,6 @@ const struct S s = {
 #embed <single_byte.txt> suffix( , .y = 100, .z = 10 )
 };
 
-_Static_assert(s.x == 'b', "");
-_Static_assert(s.y == 100, "");
-_Static_assert(s.z == 10, "");
+static_assert(s.x == 'b');
+static_assert(s.y == 100);
+static_assert(s.z == 10);
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
index 1f043ccd2ff54bf..aa8a8c41f607a9a 100644
--- a/clang/test/Preprocessor/embed_parameter_unrecognized.c
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 %s -std=c23 -E -verify
 
 #embed __FILE__ unrecognized
 // expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized' ignored}}
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
index 5c33871c0c8a4d8..a7200ca9794f7fd 100644
--- a/clang/test/Preprocessor/embed_path_chevron.c
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+// RUN: %clang_cc1 %s -std=c23 -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed <single_byte.txt>
 };
-_Static_assert(sizeof(data) == 1, "");
-_Static_assert('b' == data[0], "");
-// expected-no-diagnostics
+static_assert(sizeof(data) == 1);
+static_assert('b' == data[0]);
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
index 7e39d9be3b0a523..58d6fd4b217db8f 100644
--- a/clang/test/Preprocessor/embed_path_quote.c
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
 
 const char data[] = {
 #embed "single_byte.txt"
 };
-_Static_assert(sizeof(data) == 1, "");
-_Static_assert('b' == data[0], "");
-// expected-no-diagnostics
+static_assert(sizeof(data) == 1);
+static_assert('b' == data[0]);
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
index 5971a75ee000bbf..9709f9f329ec11c 100644
--- a/clang/test/Preprocessor/embed_weird.cpp
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
 #embed <media/empty>
 ;
 

>From 23eaf9898fcbefc96d9cbabae83f460ea06765dd Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 09:59:32 -0500
Subject: [PATCH 34/50] Fix formatting; NFC

---
 clang/lib/Lex/PPDirectives.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 19f49db6c6c61d3..0ba6b8238dfa797 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3909,7 +3909,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
     Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
   else
     Diag(EmbedTok, diag::ext_pp_embed_directive)
-        << (LangOpts.CPlusPlus ? /*Clang*/1 : /*C23*/0);
+        << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
 
   // Parse the filename header
   Token FilenameTok;

>From a10d9d6d38980fce60ba5e6861b392d245c10589 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 13:51:10 -0500
Subject: [PATCH 35/50] Fix the way we preprocess to a file

We now correctly emit the #embed directive in -dE mode, including all
parameters. We now also stop emitting internal tokens like the base64
encoded data when in regular -E mode.

This also updates the EmbedDirective() callback parameter list so that
information about the parameters is passed to callbacks.
---
 clang/include/clang/Lex/PPCallbacks.h         |  32 +---
 clang/include/clang/Lex/PPEmbedParameters.h   |  22 +++
 clang/include/clang/Lex/Preprocessor.h        |  23 ---
 clang/lib/Frontend/DependencyFile.cpp         |   8 +-
 clang/lib/Frontend/DependencyGraph.cpp        |  13 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  | 146 ++++++++++++++++--
 .../Frontend/Rewrite/InclusionRewriter.cpp    |  13 --
 clang/lib/Lex/PPDirectives.cpp                |  22 +--
 clang/lib/Lex/PPMacroExpansion.cpp            |   3 +-
 .../Preprocessor/embed_preprocess_to_file.c   |  41 ++++-
 10 files changed, 217 insertions(+), 106 deletions(-)

diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index 921bf159ead570d..c6b4c7fc224791a 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -27,6 +27,7 @@ namespace clang {
   class MacroDefinition;
   class MacroDirective;
   class MacroArgs;
+  struct LexEmbedParametersResult;
 
 /// This interface provides a way to observe the actions of the
 /// preprocessor as it does its thing.
@@ -104,25 +105,12 @@ class PPCallbacks {
   /// \param IsAngled Whether the file name was enclosed in angle brackets;
   /// otherwise, it was enclosed in quotes.
   ///
-  /// \param FilenameRange The character range of the quotes or angle brackets
-  /// for the written file name.
-  ///
-  /// \param ParametersRange The character range of the embed parameters. An
-  /// empty range if there were no parameters.
-  ///
   /// \param File The actual file that may be included by this embed directive.
   ///
-  /// \param SearchPath Contains the search path which was used to find the file
-  /// in the file system. If the file was found via an absolute path,
-  /// SearchPath will be empty.
-  ///
-  /// \param RelativePath The path relative to SearchPath, at which the resource
-  /// file was found. This is equal to FileName.
+  /// \param Params The parameters used by the directive.
   virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName,
-                              bool IsAngled, CharSourceRange FilenameRange,
-                              CharSourceRange ParametersRange,
-                              OptionalFileEntryRef File, StringRef SearchPath,
-                              StringRef RelativePath) {}
+                              bool IsAngled, OptionalFileEntryRef File,
+                              const LexEmbedParametersResult &Params) {}
 
   /// Callback invoked whenever the preprocessor cannot find a file for an
   /// inclusion directive.
@@ -515,14 +503,10 @@ class PPChainedCallbacks : public PPCallbacks {
   }
 
   void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-                      CharSourceRange FilenameRange,
-                      CharSourceRange ParametersRange,
-                      OptionalFileEntryRef File, StringRef SearchPath,
-                      StringRef RelativePath) override {
-    First->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
-                          ParametersRange, File, SearchPath, RelativePath);
-    Second->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
-                           ParametersRange, File, SearchPath, RelativePath);
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override {
+    First->EmbedDirective(HashLoc, FileName, IsAngled, File, Params);
+    Second->EmbedDirective(HashLoc, FileName, IsAngled, File, Params);
   }
 
   bool FileNotFound(StringRef FileName) override {
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index f6de84bdc915148..96e1545eaf622e7 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -73,6 +73,28 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
+struct LexEmbedParametersResult {
+  std::optional<PPEmbedParameterLimit> MaybeLimitParam;
+  std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
+  std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
+  std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
+  std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
+  SourceLocation StartLoc;
+  SourceLocation EndLoc;
+  int UnrecognizedParams;
+  bool Successful;
+
+  size_t PrefixTokenCount() const {
+    if (MaybePrefixParam)
+      return MaybePrefixParam->Tokens.size();
+    return 0;
+  }
+  size_t SuffixTokenCount() const {
+    if (MaybeSuffixParam)
+      return MaybeSuffixParam->Tokens.size();
+    return 0;
+  }
+};
 } // end namespace clang
 
 #endif
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 0222e504a156c0c..92275ed27056cc0 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1736,29 +1736,6 @@ class Preprocessor {
   /// Lex a token, forming a header-name token if possible.
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
-  struct LexEmbedParametersResult {
-    std::optional<PPEmbedParameterLimit> MaybeLimitParam;
-    std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
-    std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
-    std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
-    std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
-    SourceLocation StartLoc;
-    SourceLocation EndLoc;
-    int UnrecognizedParams;
-    bool Successful;
-
-    size_t PrefixTokenCount() const {
-      if (MaybePrefixParam)
-        return MaybePrefixParam->Tokens.size();
-      return 0;
-    }
-    size_t SuffixTokenCount() const {
-      if (MaybeSuffixParam)
-        return MaybeSuffixParam->Tokens.size();
-      return 0;
-    }
-  };
-
   LexEmbedParametersResult LexEmbedParameters(Token &Current,
                                               bool InHasEmbed = false,
                                               bool DiagnoseUnknown = true);
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index b46a16282f22c9c..10b1f146377ec11 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -62,11 +62,9 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
-  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-                      CharSourceRange FilenameRange,
-                      CharSourceRange ParametersRange,
-                      OptionalFileEntryRef File, StringRef SearchPath,
-                      StringRef RelativePath) override {
+  void EmbedDirective(SourceLocation, StringRef FileName, bool,
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override {
     if (!File)
       DepCollector.maybeAddDependency(FileName,
                                       /*FromModule*/ false,
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 4049a5245de7d34..efcaf21d1b49440 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -63,10 +63,8 @@ class DependencyGraphCallback : public PPCallbacks {
                           SrcMgr::CharacteristicKind FileType) override;
 
   void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-                      CharSourceRange FilenameRange,
-                      CharSourceRange ParametersRange,
-                      OptionalFileEntryRef File, StringRef SearchPath,
-                      StringRef RelativePath) override;
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override;
 
   void EndOfMainFile() override {
     OutputGraphFile();
@@ -104,10 +102,9 @@ void DependencyGraphCallback::InclusionDirective(
   AllFiles.insert(*FromFile);
 }
 
-void DependencyGraphCallback::EmbedDirective(
-    SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-    CharSourceRange FilenameRange, CharSourceRange ParametersRange,
-    OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+void DependencyGraphCallback::EmbedDirective(SourceLocation HashLoc, StringRef,
+                                             bool, OptionalFileEntryRef File,
+                                             const LexEmbedParametersResult &) {
   if ((Behavior & IgnoreEmbed) == IgnoreEmbed) {
     return;
   }
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 1d93ad97305da87..019043d3fa45b35 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
@@ -101,6 +102,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   bool KeepSystemIncludes;
   raw_ostream *OrigOS;
   std::unique_ptr<llvm::raw_null_ostream> NullOS;
+  unsigned NumToksToSkip;
 
   Token PrevTok;
   Token PrevPrevTok;
@@ -117,7 +119,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
         DumpEmbedDirectives(DumpEmbedDirectives),
         UseLineDirectives(UseLineDirectives),
         MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
-        KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
+        KeepSystemIncludes(KeepSystemIncludes), OrigOS(os), NumToksToSkip(0) {
     CurLine = 0;
     CurFilename += "<uninit>";
     EmittedTokensOnThisLine = false;
@@ -132,6 +134,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
     PrevPrevTok.startToken();
   }
 
+  /// Returns true if #embed directives should be expanded into a comma-
+  /// delimited list of integer constants or not.
+  bool expandEmbedContents() const { return !DumpEmbedDirectives; }
+
   bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
 
   void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
@@ -153,10 +159,8 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
                    SrcMgr::CharacteristicKind FileType,
                    FileID PrevFID) override;
   void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-                      CharSourceRange FilenameRange,
-                      CharSourceRange ParametersRange,
-                      OptionalFileEntryRef File, StringRef SearchPath,
-                      StringRef RelativePath) override;
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -239,6 +243,9 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 
   void BeginModule(const Module *M);
   void EndModule(const Module *M);
+
+  unsigned GetNumToksToSkip() const { return NumToksToSkip; }
+  void ResetSkipToks() { NumToksToSkip = 0; }
 };
 }  // end anonymous namespace
 
@@ -408,16 +415,72 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
 
 void PrintPPOutputPPCallbacks::EmbedDirective(
     SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-    CharSourceRange FilenameRange, CharSourceRange ParametersRange,
-    OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
-  // In -dI mode, dump #include directives prior to dumping their content or
-  // interpretation.
-  if (DumpEmbedDirectives) {
-    MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
-    *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
-        << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
-    setEmittedDirectiveOnThisLine();
+    OptionalFileEntryRef File, const LexEmbedParametersResult &Params) {
+  if (!DumpEmbedDirectives)
+    return;
+
+  // The EmbedDirective() callback is called before we produce the annotation
+  // token stream for the directive. We skip printing the annotation tokens
+  // within PrintPreprocessedTokens(), but we also need to skip the prefix,
+  // suffix, and if_empty tokens as those are inserted directly into the token
+  // stream and would otherwise be printed immediately after printing the
+  // #embed directive.
+  //
+  // FIXME: counting tokens to skip is a kludge but we have no way to know
+  // which tokens were inserted as part of the embed and which ones were
+  // explicitly written by the user.
+  MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+  *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+      << (IsAngled ? '>' : '"');
+
+  auto PrintToks = [&](llvm::ArrayRef<Token> Toks) {
+    SmallString<128> SpellingBuffer;
+    for (const Token &T : Toks) {
+      if (T.hasLeadingSpace())
+        *OS << " ";
+      *OS << PP.getSpelling(T, SpellingBuffer);
+    }
+  };
+  bool SkipAnnotToks = true;
+  if (Params.MaybeIfEmptyParam) {
+    *OS << " if_empty(";
+    PrintToks(Params.MaybeIfEmptyParam->Tokens);
+    *OS << ")";
+    // If the file is empty, we can skip those tokens. If the file is not
+    // empty, we skip the annotation tokens.
+    if (File && !File->getSize()) {
+      NumToksToSkip += Params.MaybeIfEmptyParam->Tokens.size();
+      SkipAnnotToks = false;
+    }
+  }
+
+  if (Params.MaybeLimitParam) {
+    *OS << " limit(" << Params.MaybeLimitParam->Limit << ")";
+  }
+  if (Params.MaybeOffsetParam) {
+    *OS << " clang::offset(" << Params.MaybeOffsetParam->Offset << ")";
+  }
+  if (Params.MaybePrefixParam) {
+    *OS << " prefix(";
+    PrintToks(Params.MaybePrefixParam->Tokens);
+    *OS << ")";
+    NumToksToSkip += Params.MaybePrefixParam->Tokens.size();
+  }
+  if (Params.MaybeSuffixParam) {
+    *OS << " suffix(";
+    PrintToks(Params.MaybeSuffixParam->Tokens);
+    *OS << ")";
+    NumToksToSkip += Params.MaybeSuffixParam->Tokens.size();
   }
+
+  // This magic number comes from the number of tokens produced by
+  // Preprocessor::HandleEmbedDirectiveImpl(); if we start emitting more tokens
+  // while preprocessing, we will need to update this logic as well.
+  if (SkipAnnotToks)
+    NumToksToSkip += 7;
+
+  *OS << " /* clang -E -dE */";
+  setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::InclusionDirective(
@@ -899,6 +962,57 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       std::string Name = M->getFullModuleName();
       Callbacks->OS->write(Name.data(), Name.size());
       Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
+    } else if (Tok.is(tok::annot_embed_start)) {
+      // Manually explode the base64 encoded data out to a stream of comma-
+      // delimited integer values. If the user passed -dE, that is handled by
+      // the EmbedDirective() callback. We should only get here if the user did
+      // not pass -dE.
+      assert(Callbacks->expandEmbedContents() &&
+             "did not expect an embed annotation");
+      // Skip the start annotation token.
+      PP.Lex(Tok);
+
+      // Expand the contents of the file and hope for the best in terms of
+      // compile time performance. The first (few) tokens are type
+      // information; we will skip the explicit cast operations.
+      while (Tok.isOneOf(tok::kw_unsigned, tok::kw_char))
+        PP.Lex(Tok);
+
+      // Next is a string literal for the file name, which we can ignore.
+      assert(Tok.is(tok::string_literal) && "expected string literal token");
+      PP.Lex(Tok);
+
+      // Then we expect a comma followed by the string literal containing the
+      // binary contents.
+      assert(Tok.is(tok::comma) && "expected a comma token");
+      PP.Lex(Tok);
+      assert(Tok.is(tok::string_literal) && "expected string literal token");
+
+      std::vector<char> BinaryContents;
+      llvm::Error Err = llvm::decodeBase64(
+          StringRef(Tok.getLiteralData() + 1, Tok.getLength() - 2),
+          BinaryContents); // +1 and -2 are to skip quotation marks.
+      // We expect no errors because we're the one to generate the original
+      // contents.
+      assert(!Err && "expected no base64 decoding errors");
+
+      // Loop over the contents and print them as a comma-delimited list of
+      // values.
+      bool PrintComma = false;
+      for (auto Iter = BinaryContents.begin(), End = BinaryContents.end();
+            Iter != End; ++Iter) {
+        if (PrintComma)
+          *Callbacks->OS << ", ";
+        *Callbacks->OS << static_cast<unsigned>(
+            static_cast<unsigned char>(*Iter));
+        PrintComma = true;
+      }
+
+      // Finally, we expect the end annotation token.
+      PP.Lex(Tok);
+      assert(Tok.is(tok::annot_embed_end) &&
+              "expected the end of the embed directive");
+      IsStartOfLine = true;
     } else if (Tok.isAnnotation()) {
       // Ignore annotation tokens created by pragmas - the pragmas themselves
       // will be reproduced in the preprocessed output.
@@ -947,6 +1061,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
     if (Tok.is(tok::eof)) break;
 
     PP.Lex(Tok);
+    // If lexing that token causes us to need to skip future tokens, do so now.
+    for (unsigned I = 0, Skip = Callbacks->GetNumToksToSkip(); I < Skip; ++I)
+      PP.Lex(Tok);
+    Callbacks->ResetSkipToks();
   }
 }
 
diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index d7141f80286c2e3..2c3a253a67d5c93 100644
--- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -71,11 +71,6 @@ class InclusionRewriter : public PPCallbacks {
                    FileID PrevFID) override;
   void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
                    SrcMgr::CharacteristicKind FileType) override;
-  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
-                      CharSourceRange FilenameRange,
-                      CharSourceRange ParametersRange,
-                      OptionalFileEntryRef File, StringRef SearchPath,
-                      StringRef RelativePath) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -182,14 +177,6 @@ void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
   LastInclusionLocation = SourceLocation();
 }
 
-/// This should be called whenever the preprocessor encounters embed
-/// directives.
-void InclusionRewriter::EmbedDirective(
-    SourceLocation /*HashLoc*/, StringRef /*FileName*/, bool /*IsAngled*/,
-    CharSourceRange /*FilenameRange*/, CharSourceRange /*ParametersRange*/,
-    OptionalFileEntryRef /*File*/, StringRef /*SearchPath*/,
-    StringRef /*RelativePath*/) {}
-
 /// This should be called whenever the preprocessor encounters include
 /// directives. It does not say whether the file has been included, but it
 /// provides more information about the directive (hash location instead
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 0ba6b8238dfa797..aa7af037850f941 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3641,7 +3641,7 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
 
 enum class BracketType { Brace, Paren, Square };
 
-Preprocessor::LexEmbedParametersResult
+LexEmbedParametersResult
 Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
                                  bool DiagnoseUnknown) {
   LexEmbedParametersResult Result{};
@@ -3832,6 +3832,11 @@ void Preprocessor::HandleEmbedDirectiveImpl(
   // from, and the second string-literal is the base64 encoded data we loaded
   // from the file. The comma separation between string-literals prevents the
   // literals from combining into a single string literal.
+  //
+  // NOTE: if you change the token sequence, you will need to update
+  // Parser::ParseCastExpression() (the case for tok::annot_embed_start) as
+  // well as PrintPPOutputPPCallbacks::EmbedDirective() and
+  // PrintPreprocessedTokens() (the case for tok::annot_embed_start).
   auto EmitToks = [&](ArrayRef<Token> Toks) {
     size_t TokCount = Toks.size();
     auto NewToks = std::make_unique<Token[]>(TokCount);
@@ -3842,7 +3847,8 @@ void Preprocessor::HandleEmbedDirectiveImpl(
     // If we have no binary contents, the only thing we need to emit are the
     // if_empty tokens, if any.
     // FIXME: this loses AST fidelity; nothing in the compiler will see that
-    // these tokens came from #embed.
+    // these tokens came from #embed. We have to hack around this when printing
+    // preprocessed output. The same is true for prefix and suffix tokens.
     if (Params.MaybeIfEmptyParam)
       EmitToks(Params.MaybeIfEmptyParam->Tokens);
     return;
@@ -4005,15 +4011,9 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
            "definition";
     return;
   }
-  if (Callbacks) {
-    CharSourceRange FilenameSourceRange(
-        SourceRange(FilenameTok.getLocation(), FilenameTok.getEndLoc()), true);
-    CharSourceRange ParametersRange(SourceRange(Params.StartLoc, Params.EndLoc),
-                                    true);
-    Callbacks->EmbedDirective(HashLoc, Filename, isAngled, FilenameSourceRange,
-                              ParametersRange, MaybeFileRef, SearchPath,
-                              RelativePath);
-  }
+  if (Callbacks)
+    Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
+                              Params);
   HandleEmbedDirectiveImpl(HashLoc, FilenameTok, Filename, SearchPath,
                            RelativePath, Params, BinaryContents,
                            TargetCharWidth);
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 79e521649c747fe..cbaac20d0c7f1b0 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1326,8 +1326,7 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   SourceLocation FilenameLoc = Tok.getLocation();
   Token FilenameTok = Tok;
 
-  Preprocessor::LexEmbedParametersResult Params =
-      this->LexEmbedParameters(Tok, true, false);
+  LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
   if (!Params.Successful) {
     if (Tok.isNot(tok::eod))
       this->DiscardUntilEndOfDirective();
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
index 314cd823e577044..4234866a26a7bd2 100644
--- a/clang/test/Preprocessor/embed_preprocess_to_file.c
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -1,10 +1,39 @@
-// RUN: %clang_cc1 %s -std=c23 -E -embed-dir=%S/Inputs | FileCheck %s
+// RUN: %clang_cc1 -std=c23 %s -E -embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED
+// RUN: %clang_cc1 -std=c23 %s -E -dE -embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE
 
-// Ensure that we print out the correct data to the preprocessed file. Note,
-// #embed will do a base64 encoding of the file contents, so if art.txt changes,
-// this test will need to change accordingly as well.
+// Ensure that we correctly preprocess to a file, both with expanding embed
+// directives fully and with printing the directive instead.
 const char data[] = {
-#embed <media/art.txt>
+#embed <jk.txt> if_empty('a', 'b') clang::offset(0) limit(1) suffix(, 'a', 0) prefix('h',)
 };
 
-// CHECK: "{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg=="
+// EXPANDED: const char data[] = {'h',106, 'a', 0};
+// DIRECTIVE: const char data[] = {
+// DIRECTIVE-NEXT: #embed <jk.txt> if_empty('a', 'b') limit(1) clang::offset(0) prefix('h',) suffix(, 'a', 0) /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char more[] = {
+#embed <media/empty> if_empty('a', 'b')
+};
+
+// EXPANDED: const char more[] = {'a', 'b'}
+// DIRECTIVE: const char more[] = {
+// DIRECTIVE-NEXT: #embed <media/empty> if_empty('a', 'b') /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char even_more[] = {
+  1, 2, 3,
+#embed <jk.txt> prefix(4, 5,) suffix(, 6, 7)
+  , 8, 9, 10
+};
+
+// EXPANDED: const char even_more[] = {
+// EXPANDED-NEXT:   1, 2, 3,4, 5,106, 107, 6, 7 , 8, 9, 10
+// EXPANDED-EMPTY:
+// EXPANDED-EMPTY:
+// EXPANDED-NEXT: };
+// DIRECTIVE: const char even_more[] = {
+// DIRECTIVE-NEXT:  1, 2, 3,
+// DIRECTIVE-NEXT: #embed <jk.txt> prefix(4, 5,) suffix(, 6, 7) /* clang -E -dE */
+// DIRECTIVE-NEXT:  , 8, 9, 10
+// DIRECTIVE-NEXT: };

>From a24589980fef2e229eb95a75d4da4b64391af0ee Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 14:21:12 -0500
Subject: [PATCH 36/50] Fix formatting; NFC

---
 clang/lib/Frontend/PrintPreprocessedOutput.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 019043d3fa45b35..61ef7c2911b4066 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Frontend/Utils.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/PreprocessorOutputOptions.h"
+#include "clang/Frontend/Utils.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Pragma.h"
@@ -1000,7 +1000,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       // values.
       bool PrintComma = false;
       for (auto Iter = BinaryContents.begin(), End = BinaryContents.end();
-            Iter != End; ++Iter) {
+           Iter != End; ++Iter) {
         if (PrintComma)
           *Callbacks->OS << ", ";
         *Callbacks->OS << static_cast<unsigned>(
@@ -1011,7 +1011,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       // Finally, we expect the end annotation token.
       PP.Lex(Tok);
       assert(Tok.is(tok::annot_embed_end) &&
-              "expected the end of the embed directive");
+             "expected the end of the embed directive");
       IsStartOfLine = true;
     } else if (Tok.isAnnotation()) {
       // Ignore annotation tokens created by pragmas - the pragmas themselves

>From 00c6ff9baf9c2108ff256d25843663cbf4dd3e22 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 14:26:42 -0500
Subject: [PATCH 37/50] Attempt to appease clang-format; NFC

---
 clang/include/clang/Lex/PPCallbacks.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index c6b4c7fc224791a..5a83a26d4a8ee68 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -22,12 +22,12 @@
 #include "llvm/ADT/StringRef.h"
 
 namespace clang {
-  class Token;
-  class IdentifierInfo;
-  class MacroDefinition;
-  class MacroDirective;
-  class MacroArgs;
-  struct LexEmbedParametersResult;
+class Token;
+class IdentifierInfo;
+class MacroDefinition;
+class MacroDirective;
+class MacroArgs;
+struct LexEmbedParametersResult;
 
 /// This interface provides a way to observe the actions of the
 /// preprocessor as it does its thing.

>From f36c95fe754047384dd0e21c93b6b97c9e85f9f2 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 15:10:21 -0500
Subject: [PATCH 38/50] Improve the __has_embed tests somewhat

This identified a potential issue with the clang::offset parameter,
which behaves in a somewhat surprising manner when combined with a
limit parameter. The test case now has a comment explaining the issue.
---
 clang/test/Preprocessor/embed___has_embed.c | 43 ++++++++++++++-------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
index d63a06372c6d1e4..e1cb1b2322ceec1 100644
--- a/clang/test/Preprocessor/embed___has_embed.c
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -1,34 +1,47 @@
 // RUN: %clang_cc1 -std=c23 %s -E -embed-dir=%S/Inputs -verify
 // expected-no-diagnostics
 
-#if !__has_embed(__FILE__)
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
 #error 1
-#elif !__has_embed("media/art.txt")
+#elif __has_embed("media/art.txt") != __STDC_EMBED_FOUND__
 #error 2
-#elif __has_embed("asdkasdjkadsjkdsfjk")
+#elif __has_embed("asdkasdjkadsjkdsfjk") != __STDC_EMBED_NOT_FOUND__
 #error 3
-#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1))
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) != __STDC_EMBED_NOT_FOUND__
 #error 4
-#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1))
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) != __STDC_EMBED_NOT_FOUND__
 #error 5
-#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD"))
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) != __STDC_EMBED_NOT_FOUND__
 #error 6
-#elif !__has_embed(__FILE__ limit(2) prefix(y))
+#elif __has_embed(__FILE__ limit(2) prefix(y)) != __STDC_EMBED_FOUND__
 #error 7
-#elif !__has_embed(__FILE__ limit(2))
+#elif __has_embed(__FILE__ limit(2)) != __STDC_EMBED_FOUND__
 #error 8
-#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x))
+// 6.10.1p7, if the search fails or any of the embed parameters in the embed
+// parameter sequence specified are not supported by the implementation for the
+// #embed directive;
+// We don't support one of the embed parameters.
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) != __STDC_EMBED_NOT_FOUND__
 #error 9
-#elif __has_embed(<media/empty>) != 2
+#elif __has_embed(<media/empty>) != __STDC_EMBED_EMPTY__
 #error 10
-#elif __has_embed(<media/empty> limit(0)) != 2
+// 6.10.1p7: if the search for the resource succeeds and all embed parameters
+// in the embed parameter sequence specified are supported by the
+// implementation for the #embed directive and the resource is empty
+// Limiting to zero characters means the resource is empty.
+#elif __has_embed(<media/empty> limit(0)) != __STDC_EMBED_EMPTY__
 #error 11
-#elif __has_embed(<media/art.txt> limit(0)) != 2
+#elif __has_embed(<media/art.txt> limit(0)) != __STDC_EMBED_EMPTY__
 #error 12
-#elif __has_embed(<media/art.txt> limit(1) clang::offset(1)) != 2
+// FIXME: It's debatable whether this test is correct or not; if we limit the
+// file to one character and then offset by one character, the file is empty.
+// But if we offset by one character and then limit the file to one character,
+// the file is not empty. We do not yet document this extension and so the
+// behavior of this might change.
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(1)) != __STDC_EMBED_EMPTY__
 #error 13
-#elif !__has_embed(<media/art.txt>)
+#elif __has_embed(<media/art.txt>) != __STDC_EMBED_FOUND__
 #error 14
-#elif !__has_embed(<media/art.txt> if_empty(meow))
+#elif __has_embed(<media/art.txt> if_empty(meow)) != __STDC_EMBED_FOUND__
 #error 14
 #endif

>From a76fa14e0dd39488059cf5f066e5d5a95d6f16c0 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 14 Nov 2023 15:14:18 -0500
Subject: [PATCH 39/50] Use named constants in test instead of magic values;
 NFC

---
 .../embed___has_embed_supported.c             | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
index f972762e256770e..9cee084f94f74b7 100644
--- a/clang/test/Preprocessor/embed___has_embed_supported.c
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -1,24 +1,24 @@
 // RUN: %clang_cc1 -std=c23 %s -E -verify
 
-#if !__has_embed(__FILE__)
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
 #error 1
-#elif !__has_embed(__FILE__)
+#elif __has_embed(__FILE__) != __STDC_EMBED_FOUND__
 #error 2
-#elif !__has_embed(__FILE__ suffix(x))
+#elif __has_embed(__FILE__ suffix(x)) != __STDC_EMBED_FOUND__
 #error 3
-#elif !__has_embed(__FILE__ suffix(x) limit(1))
+#elif __has_embed(__FILE__ suffix(x) limit(1)) != __STDC_EMBED_FOUND__
 #error 4
-#elif !__has_embed(__FILE__ suffix(x) limit(1) prefix(1))
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1)) != __STDC_EMBED_FOUND__
 #error 5
-#elif !__has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1))
+#elif __has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
 #error 6
-#elif !__has_embed(__FILE__ suffix(x) limit(0) prefix(1))
+#elif __has_embed(__FILE__ suffix(x) limit(0) prefix(1)) != __STDC_EMBED_EMPTY__
 #error 7
-#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != 2
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != __STDC_EMBED_EMPTY__
 #error 8
-#elif __has_embed(__FILE__ suffix(x) limit(0)) != 2
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != __STDC_EMBED_EMPTY__
 #error 9
-#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != 2
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != __STDC_EMBED_EMPTY__
 #error 10
 #endif
 // expected-no-diagnostics

>From 1dcc44989a4982283b2631bd2256686f7473d20d Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Fri, 17 Nov 2023 13:13:34 -0500
Subject: [PATCH 40/50] Start reworking the way we represent a PPEmbedExpr

Add an iterator interface to PPEmbedExpr to iterate through the
contents of the resource as though they were IntegerLiteral AST nodes,
but without creating one AST node per byte in the resource.

We now iterate over those fake nodes from some of the recrusive AST
visitors, like constant expression evaluation, so that we can remove
some kludges.
---
 clang/include/clang/AST/Expr.h                | 78 ++++++++++++++++++-
 clang/include/clang/AST/RecursiveASTVisitor.h |  6 +-
 .../clang/Basic/DiagnosticCommonKinds.td      |  3 -
 clang/include/clang/Sema/Sema.h               | 14 +---
 clang/lib/AST/Expr.cpp                        |  4 +-
 clang/lib/AST/ExprConstant.cpp                | 12 ++-
 clang/lib/AST/Interp/ByteCodeExprGen.cpp      |  9 +++
 clang/lib/AST/Interp/ByteCodeExprGen.h        |  1 +
 clang/lib/Sema/SemaDecl.cpp                   | 15 ++--
 clang/lib/Sema/SemaDeclCXX.cpp                |  2 -
 clang/lib/Sema/SemaExpr.cpp                   | 49 +++---------
 clang/test/Preprocessor/embed_constexpr.cpp   | 62 +++++++++++++++
 clang/test/Preprocessor/embed_init.c          |  1 +
 13 files changed, 182 insertions(+), 74 deletions(-)
 create mode 100644 clang/test/Preprocessor/embed_constexpr.cpp

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 6345faefa62ff26..5f3b94a86f687c5 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4814,8 +4814,10 @@ class SourceLocExpr final : public Expr {
 class PPEmbedExpr final : public Expr {
   SourceLocation BuiltinLoc, RParenLoc;
   DeclContext *ParentContext;
-  StringLiteral *Filename;
-  StringLiteral *BinaryData;
+  StringLiteral *Filename = nullptr;
+  StringLiteral *BinaryData = nullptr;
+  IntegerLiteral *FakeChildNode = nullptr;
+  const ASTContext *Ctx = nullptr;
 
 public:
   enum Action {
@@ -4845,12 +4847,82 @@ class PPEmbedExpr final : public Expr {
 
   size_t getDataElementCount(ASTContext &Context) const;
 
+private:
+  template <bool Const>
+  class ChildElementIter
+      : public llvm::iterator_facade_base<
+            // FIXME: it seems reasonable to make this a random access iterator
+            // instead, but all current access patterns are a linear walk over
+            // the contents, so it's being left for follow-up work if needed.
+            ChildElementIter<Const>, std::input_iterator_tag,
+            std::conditional_t<Const, const IntegerLiteral *,
+                               IntegerLiteral *>> {
+    friend class PPEmbedExpr;
+
+    const ASTContext *Ctx = nullptr;
+    IntegerLiteral *FakeNode = nullptr;
+    StringRef DataRef;
+    LLVM_PREFERRED_TYPE(bool) unsigned IsSigned : 1;
+    // FIXME: a sufficiently large embedded resource cannot be iterated over,
+    // woe unto such users.
+    unsigned CurOffset : 31;
+    using BaseTy = typename ChildElementIter::iterator_facade_base;
+
+    ChildElementIter(const PPEmbedExpr *E)
+        : IsSigned(E->getType()->isSignedIntegerType()), CurOffset(0) {
+      Ctx = E->Ctx;
+      FakeNode = E->FakeChildNode;
+      DataRef = E->BinaryData->getBytes();
+    }
+
+    // Max value that can be stored in a 31-bit bit-field.
+    static constexpr unsigned EndIterSentinel = 0x7FFFFFFF;
+
+  public:
+    ChildElementIter() : IsSigned(false), CurOffset(EndIterSentinel) {}
+    typename BaseTy::reference operator*() const {
+      assert(Ctx && FakeNode && CurOffset != EndIterSentinel &&
+             "trying to dereference an invalid iterator");
+      FakeNode->setValue(*Ctx, llvm::APInt(FakeNode->getValue().getBitWidth(),
+                                           DataRef[CurOffset], IsSigned));
+      return const_cast<typename BaseTy::reference>(FakeNode);
+    }
+    typename BaseTy::pointer operator->() const { return **this; }
+    using BaseTy::operator++;
+    ChildElementIter &operator++() {
+      assert(CurOffset != EndIterSentinel &&
+             "Already at the end of what we can iterate over");
+      if (++CurOffset >= DataRef.size()) {
+        CurOffset = EndIterSentinel;
+        FakeNode = nullptr;
+      }
+      return *this;
+    }
+    bool operator==(ChildElementIter Other) const {
+      return (FakeNode == Other.FakeNode && CurOffset == Other.CurOffset);
+    }
+  }; // class ChildElementIter
+
+public:
+  using fake_child_range = llvm::iterator_range<ChildElementIter<false>>;
+  using const_fake_child_range = llvm::iterator_range<ChildElementIter<true>>;
+
+  fake_child_range underlying_data_elements() {
+    return fake_child_range(ChildElementIter<false>(this),
+                            ChildElementIter<false>());
+  }
+
+  const_fake_child_range underlying_data_elements() const {
+    return const_fake_child_range(ChildElementIter<true>(this),
+                                  ChildElementIter<true>());
+  }
+
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
 
   const_child_range children() const {
-    return const_child_range(child_iterator(), child_iterator());
+    return const_child_range(const_child_iterator(), const_child_iterator());
   }
 
   static bool classof(const Stmt *T) {
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index fa0eefeb3fd56c1..d3f2568351bf8ad 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2809,7 +2809,11 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
 DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
 DEF_TRAVERSE_STMT(StmtExpr, {})
 DEF_TRAVERSE_STMT(SourceLocExpr, {})
-DEF_TRAVERSE_STMT(PPEmbedExpr, {})
+DEF_TRAVERSE_STMT(PPEmbedExpr, {
+  for (IntegerLiteral *IL : S->underlying_data_elements()) {
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(IL);
+  }
+})
 
 DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
   TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index e20e657b65b3e02..3ce836e858378fb 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -83,9 +83,6 @@ def err_expected : Error<"expected %0">;
 def err_expected_either : Error<"expected %0 or %1">;
 def err_expected_after : Error<"expected %1 after %0">;
 
-def err_builtin_pp_embed_invalid_location : Error<
-  "'__builtin_pp_embed' in invalid location: %0%select{|%2}1">;
-
 def err_param_redefinition : Error<"redefinition of parameter %0">;
 def warn_method_param_redefinition : Warning<"redefinition of method parameter %0">;
 def warn_method_param_declaration : Warning<"redeclaration of method parameter %0">,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 48eac54f36f676e..604145495ad80d5 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6095,7 +6095,8 @@ class Sema final {
                               SourceLocation RPLoc, StringLiteral *Filename,
                               QualType DataTy, std::vector<char> BinaryData);
 
-  IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed);
+  IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed,
+                                          bool FirstElement = true);
 
   PPEmbedExpr::Action
   CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
@@ -6107,17 +6108,6 @@ class Sema final {
   PPEmbedExpr::Action
   ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &OutputList);
 
-  enum PPEmbedExprContext {
-    PPEEC__StaticAssert,
-    PPEEC_StaticAssert,
-  };
-
-  StringRef GetLocationName(PPEmbedExprContext Context) const;
-
-  bool DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
-                           PPEmbedExprContext Context,
-                           bool SingleAllowed = true);
-
   // Build a potentially resolved SourceLocExpr.
   ExprResult BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
                                 SourceLocation BuiltinLoc, SourceLocation RPLoc,
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index ff3e4a725e6df09..abd55893c21cbff 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2351,8 +2351,10 @@ PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy,
                          DeclContext *ParentContext)
     : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary),
       BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext),
-      Filename(Filename), BinaryData(BinaryData) {
+      Filename(Filename), BinaryData(BinaryData), Ctx(&Ctx) {
   setDependence(ExprDependence::None);
+  FakeChildNode = IntegerLiteral::Create(
+      Ctx, llvm::APInt::getZero(Ctx.getTypeSize(ResultTy)), ResultTy, BLoc);
 }
 
 size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const {
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 273af98c4c75efd..6d8ff2173e191f4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7662,6 +7662,14 @@ class ExprEvaluatorBase
     return Error(E);
   }
 
+  bool VisitPPEmbedExpr(const PPEmbedExpr *E) {
+    for (const IntegerLiteral *IL : E->underlying_data_elements()) {
+      if (!StmtVisitorTy::Visit(IL))
+        return false;
+    }
+    return true;
+  }
+
   bool VisitPredefinedExpr(const PredefinedExpr *E) {
     return StmtVisitorTy::Visit(E->getFunctionName());
   }
@@ -16029,6 +16037,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::SizeOfPackExprClass:
   case Expr::GNUNullExprClass:
   case Expr::SourceLocExprClass:
+  case Expr::PPEmbedExprClass:
     return NoDiag();
 
   case Expr::SubstNonTypeTemplateParmExprClass:
@@ -16310,9 +16319,6 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
       return ICEDiag(IK_NotICE, E->getBeginLoc());
     return CheckICE(cast<CastExpr>(E)->getSubExpr(), Ctx);
   }
-  case Expr::PPEmbedExprClass: {
-    return ICEDiag(IK_ICE, E->getBeginLoc());
-  }
   }
 
   llvm_unreachable("Invalid StmtClass!");
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 70032cce2775148..365bd64354e7f1d 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -703,6 +703,15 @@ bool ByteCodeExprGen<Emitter>::VisitConstantExpr(const ConstantExpr *E) {
   return this->delegate(E->getSubExpr());
 }
 
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::VisitPPEmbedExpr(const PPEmbedExpr *E) {
+  for (const IntegerLiteral *IL : E->underlying_data_elements()) {
+    if (!this->visit(IL))
+      return false;
+  }
+  return true;
+}
+
 static CharUnits AlignOfType(QualType T, const ASTContext &ASTCtx,
                              UnaryExprOrTypeTrait Kind) {
   bool AlignOfReturnsPreferred =
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 1c304caad5577ce..cbd9b38c36d5da4 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -107,6 +107,7 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
   bool VisitOffsetOfExpr(const OffsetOfExpr *E);
   bool VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E);
   bool VisitSizeOfPackExpr(const SizeOfPackExpr *E);
+  bool VisitPPEmbedExpr(const PPEmbedExpr *E);
 
 protected:
   bool visitExpr(const Expr *E) override;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 70dc6ad26abec27..df3b1ad528ea02c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13361,22 +13361,19 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
     }
   }
 
-  if (!AlreadyAdjustedPPEmbedExpr) {
+  if (!AlreadyAdjustedPPEmbedExpr && Init) {
     // If there is a PPEmbedExpr as a single initializer without braces,
     // make sure it only produces a single element (and then expand said
     // element).
-    if (PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Init);
-        PPEmbed) {
+    if (PPEmbedExpr *PPEmbed = dyn_cast<PPEmbedExpr>(Init->IgnoreParens())) {
       if (PPEmbed->getDataElementCount(Context) == 1) {
         // Expand the list in-place immediately, let the natural work take hold
         Init = ExpandSinglePPEmbedExpr(PPEmbed);
       } else {
-        // #embed only produces 2 or more values.
-        // FIXME: still uses the old builtin name.
-        Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type)
-            << "'__builtin_pp_embed'";
-        RealDecl->setInvalidDecl();
-        return;
+        // Whee, this is a comma expression! However, we don't need to retain
+        // it as such because the comma expression results are the right-most
+        // operand. So we'll get that value and expand it as a single value.
+        Init = ExpandSinglePPEmbedExpr(PPEmbed, /*FirstElement*/ false);
       }
     }
 
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 349336e4c081b18..e93b96e68ffb0be 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17040,8 +17040,6 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
                                          SourceLocation RParenLoc) {
   if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression))
     return nullptr;
-  if (DiagnosePPEmbedExpr(AssertExpr, StaticAssertLoc, PPEEC_StaticAssert))
-    return nullptr;
   return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr,
                                       AssertMessageExpr, RParenLoc, false);
 }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 1833e3eaa9cbc59..dc89c7e69d747be 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17619,14 +17619,16 @@ ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
                   RPLoc, CurContext);
 }
 
-IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed) {
-  assert(PPEmbed->getDataElementCount(Context) == 1 &&
+IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed,
+                                              bool FirstElement) {
+  assert((PPEmbed->getDataElementCount(Context) == 1 || !FirstElement) &&
          "Data should only contain a single element");
   StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
   QualType ElementTy = PPEmbed->getType();
   const size_t TargetWidth = Context.getTypeSize(ElementTy);
   const size_t BytesPerElement = CHAR_BIT / TargetWidth;
   StringRef Data = DataLiteral->getBytes();
+  Data = Data.substr(FirstElement ? 0 : Data.size() - 1, 1);
   SmallVector<uint64_t, 4> ByteVals{};
   for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
     if ((ValIndex % sizeof(uint64_t)) == 0) {
@@ -17647,9 +17649,10 @@ Sema::CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
   if (ExprList.empty()) {
     return PPEmbedExpr::NotFound;
   }
-  PPEmbedExpr *First = ExprList.size() == 1
-                           ? dyn_cast_if_present<PPEmbedExpr>(ExprList[0])
-                           : nullptr;
+  PPEmbedExpr *First =
+      ExprList.size() == 1
+          ? dyn_cast_if_present<PPEmbedExpr>(ExprList[0]->IgnoreParens())
+          : nullptr;
   if (First) {
     // only one and it's an embed
     if (MaybeInitType) {
@@ -17675,7 +17678,7 @@ Sema::CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
   }
   if (std::find_if(ExprList.begin(), ExprList.end(),
                    [](const Expr *const SomeExpr) {
-                     return isa<PPEmbedExpr>(SomeExpr);
+                     return isa<PPEmbedExpr>(SomeExpr->IgnoreParens());
                    }) == ExprList.end()) {
     // We didn't find one.
     return PPEmbedExpr::NotFound;
@@ -17786,40 +17789,6 @@ Sema::ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
   return PPEmbedExpr::Expanded;
 }
 
-StringRef Sema::GetLocationName(PPEmbedExprContext Context) const {
-  switch (Context) {
-  default:
-    llvm_unreachable("unhandled PPEmbedExprContext value");
-  case PPEEC__StaticAssert:
-    return "_Static_assert";
-  case PPEEC_StaticAssert:
-    return "static_assert";
-  }
-}
-
-bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
-                               PPEmbedExprContext PPEmbedContext,
-                               bool SingleAllowed) {
-  PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
-  if (!PPEmbed)
-    return false;
-
-  if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
-    E = ExpandSinglePPEmbedExpr(PPEmbed);
-    return false;
-  }
-
-  StringRef LocationName = GetLocationName(PPEmbedContext);
-  StringRef DiagnosticMessage =
-      (SingleAllowed ? "cannot use a preprocessor embed that expands to "
-                       "nothing or expands to "
-                       "more than one item in "
-                     : "cannot use a preprocessor embed in ");
-  Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
-      << DiagnosticMessage << 1 << LocationName;
-  return true;
-}
-
 bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,
                                         bool Diagnose) {
   if (!getLangOpts().ObjC)
diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp
new file mode 100644
index 000000000000000..572a55c6558cb68
--- /dev/null
+++ b/clang/test/Preprocessor/embed_constexpr.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify=fixme -fexperimental-new-constant-interpreter -Wno-c23-extensions
+// expected-no-diagnostics
+
+constexpr int value(int a, int b) {
+  return a + b;
+}
+
+constexpr int func_call() {
+  return value(
+#embed <jk.txt>
+  );
+}
+
+constexpr int init_list_expr() {
+  int vals[] = {
+#embed <jk.txt>
+  };
+  return value(vals[0], vals[1]);
+}
+
+template <int N, int M>
+struct Hurr {
+  static constexpr int V1 = N;
+  static constexpr int V2 = M;
+};
+
+constexpr int template_args() {
+  Hurr<
+#embed <jk.txt>
+  > H;
+  return value(H.V1, H.V2);
+}
+
+constexpr int ExpectedValue = 'j' + 'k';
+static_assert(func_call() == ExpectedValue);
+static_assert(init_list_expr() == ExpectedValue);
+static_assert(template_args() == ExpectedValue); // fixme-error {{static assertion expression is not an integral constant expression}}
+
+static_assert(
+#embed <jk.txt> limit(1) suffix(== 'j')
+);
+
+int array[
+#embed <jk.txt> limit(1)
+];
+static_assert(sizeof(array) / sizeof(int) == 'j');
+
+constexpr int comma_expr = (
+#embed <jk.txt>
+);
+static_assert(comma_expr == 'k');
+
+constexpr int comma_expr_init_list{ (
+#embed <jk.txt> limit(1)
+) };
+static_assert(comma_expr_init_list == 'j');
+
+constexpr int paren_init(
+#embed <jk.txt> limit(1)
+);
+static_assert(paren_init == 'j');
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
index d1751c78ed6f8f0..60038b14e84494f 100644
--- a/clang/test/Preprocessor/embed_init.c
+++ b/clang/test/Preprocessor/embed_init.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter
 // expected-no-diagnostics
 
 typedef struct kitty {

>From 8433fd5718d99fc15e8339ab4ea233183cbed5c5 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 27 Nov 2023 11:08:46 -0500
Subject: [PATCH 41/50] Rename `-embed-dir` to `--embed-dir` and only accept
 the = form

---
 clang/include/clang/Driver/Options.td              | 6 +-----
 clang/lib/Driver/ToolChains/Clang.cpp              | 2 +-
 clang/lib/Frontend/CompilerInvocation.cpp          | 4 ++--
 clang/test/Preprocessor/embed___has_embed.c        | 2 +-
 clang/test/Preprocessor/embed_art.c                | 2 +-
 clang/test/Preprocessor/embed_constexpr.cpp        | 4 ++--
 clang/test/Preprocessor/embed_ext_compat_diags.c   | 8 ++++----
 clang/test/Preprocessor/embed_init.c               | 4 ++--
 clang/test/Preprocessor/embed_parameter_if_empty.c | 2 +-
 clang/test/Preprocessor/embed_parameter_limit.c    | 2 +-
 clang/test/Preprocessor/embed_parameter_offset.c   | 2 +-
 clang/test/Preprocessor/embed_parameter_prefix.c   | 2 +-
 clang/test/Preprocessor/embed_parameter_suffix.c   | 2 +-
 clang/test/Preprocessor/embed_path_chevron.c       | 2 +-
 clang/test/Preprocessor/embed_path_quote.c         | 2 +-
 clang/test/Preprocessor/embed_preprocess_to_file.c | 4 ++--
 clang/test/Preprocessor/embed_single_entity.c      | 2 +-
 clang/test/Preprocessor/embed_weird.cpp            | 4 ++--
 18 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index ccd6bf616c69396..6773ce6258d0b9f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -830,11 +830,7 @@ will be ignored}]>;
 def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     Visibility<[ClangOption, FlangOption]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
-def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
-    Flags<[RenderJoined]>, Group<EmbedPath_Group>,
-    Visibility<[ClangOption, CC1Option]>,
-    MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
-def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
+def embed_dir_EQ : Joined<["--"], "embed-dir=">,
     Flags<[RenderJoined]>, Group<EmbedPath_Group>,
     Visibility<[ClangOption, CC1Option]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index ebceb9c2d61aa2f..c820b5ddaff3879 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8280,7 +8280,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
-  // Pass along any -embed-dir or similar options so we get proper embed paths.
+  // Pass along any --embed-dir or similar options so we get proper embed paths.
   Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group);
 
   // Determine the original source input.
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index d1010504681b706..2319086bd0ff6d2 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4352,7 +4352,7 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
     GenerateArg(Consumer, OPT_source_date_epoch, Twine(*Opts.SourceDateEpoch));
 
   for (const auto &EmbedEntry : Opts.EmbedEntries)
-    GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
+    GenerateArg(Consumer, OPT_embed_dir_EQ, EmbedEntry);
 
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
@@ -4446,7 +4446,7 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     }
   }
 
-  for (const auto *A : Args.filtered(OPT_embed_dir, OPT_embed_dir_EQ)) {
+  for (const auto *A : Args.filtered(OPT_embed_dir_EQ)) {
     StringRef Val = A->getValue();
     Opts.EmbedEntries.push_back(std::string(Val));
   }
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
index e1cb1b2322ceec1..d1d3d11cc461cb4 100644
--- a/clang/test/Preprocessor/embed___has_embed.c
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -E -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs -verify
 // expected-no-diagnostics
 
 #if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
index 4b30bf41ab54f72..a664715091319f9 100644
--- a/clang/test/Preprocessor/embed_art.c
+++ b/clang/test/Preprocessor/embed_art.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp
index 572a55c6558cb68..39b7398d4db6dc1 100644
--- a/clang/test/Preprocessor/embed_constexpr.cpp
+++ b/clang/test/Preprocessor/embed_constexpr.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify -Wno-c23-extensions
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify=fixme -fexperimental-new-constant-interpreter -Wno-c23-extensions
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify=fixme -fexperimental-new-constant-interpreter -Wno-c23-extensions
 // expected-no-diagnostics
 
 constexpr int value(int a, int b) {
diff --git a/clang/test/Preprocessor/embed_ext_compat_diags.c b/clang/test/Preprocessor/embed_ext_compat_diags.c
index f33236e3b46f340..2a4ac400a19acbb 100644
--- a/clang/test/Preprocessor/embed_ext_compat_diags.c
+++ b/clang/test/Preprocessor/embed_ext_compat_diags.c
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify=none -pedantic
-// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat
-// RUN: %clang_cc1 -std=c17 %s -fsyntax-only -embed-dir=%S/Inputs -verify=ext -pedantic
-// RUN: %clang_cc1 -x c++ %s -fsyntax-only -embed-dir=%S/Inputs -verify=cxx -pedantic
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=none -pedantic
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat
+// RUN: %clang_cc1 -std=c17 %s -fsyntax-only --embed-dir=%S/Inputs -verify=ext -pedantic
+// RUN: %clang_cc1 -x c++ %s -fsyntax-only --embed-dir=%S/Inputs -verify=cxx -pedantic
 // none-no-diagnostics
 
 #if __has_embed("jk.txt") /* compat-warning {{'__has_embed' is incompatible with C standards before C23}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
index 60038b14e84494f..79b1743703ac5b3 100644
--- a/clang/test/Preprocessor/embed_init.c
+++ b/clang/test/Preprocessor/embed_init.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
-// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter
 // expected-no-diagnostics
 
 typedef struct kitty {
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
index 2f91ff363d3dbc1..e40c78e897d5bc6 100644
--- a/clang/test/Preprocessor/embed_parameter_if_empty.c
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
index 186137953d1ce1c..f952076a956c770 100644
--- a/clang/test/Preprocessor/embed_parameter_limit.c
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
index a18c6fde01f6e6c..72a8691a74a4017 100644
--- a/clang/test/Preprocessor/embed_parameter_offset.c
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
index 9b23c99d04a8fb3..77616fcdd966fa3 100644
--- a/clang/test/Preprocessor/embed_parameter_prefix.c
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
index 26e0fd2ca82aa38..280c9e0f9d11552 100644
--- a/clang/test/Preprocessor/embed_parameter_suffix.c
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
index a7200ca9794f7fd..b12cb9ceb54b8bd 100644
--- a/clang/test/Preprocessor/embed_path_chevron.c
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -std=c23 -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -std=c23 -fsyntax-only --embed-dir=%S/Inputs -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
index 58d6fd4b217db8f..d62e81d3df7d9d6 100644
--- a/clang/test/Preprocessor/embed_path_quote.c
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
 // expected-no-diagnostics
 
 const char data[] = {
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
index 4234866a26a7bd2..8358fca91660613 100644
--- a/clang/test/Preprocessor/embed_preprocess_to_file.c
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c23 %s -E -embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED
-// RUN: %clang_cc1 -std=c23 %s -E -dE -embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED
+// RUN: %clang_cc1 -std=c23 %s -E -dE --embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE
 
 // Ensure that we correctly preprocess to a file, both with expanding embed
 // directives fully and with printing the directive instead.
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
index 8cbee2a93626152..945beda628e7890 100644
--- a/clang/test/Preprocessor/embed_single_entity.c
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -std=c23 -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 --embed-dir=%S/Inputs -verify
 
 const char data =
 #embed "single_byte.txt"
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
index 9709f9f329ec11c..540a2b1741def65 100644
--- a/clang/test/Preprocessor/embed_weird.cpp
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify -Wno-c23-extensions
-// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
 #embed <media/empty>
 ;
 

>From d6c3c908df237d21646bf270e3650b1e32078a88 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 27 Nov 2023 11:17:06 -0500
Subject: [PATCH 42/50] Remove the embed path group; it's not necessary

---
 clang/include/clang/Driver/Options.td | 12 +++---------
 clang/lib/Driver/ToolChains/Clang.cpp |  6 +-----
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 6773ce6258d0b9f..9523e32a5f8af69 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -114,11 +114,6 @@ def IncludePath_Group : OptionGroup<"<I/i group>">, Group<Preprocessor_Group>,
                         DocBrief<[{
 Flags controlling how ``#include``\s are resolved to files.}]>;
 
-def EmbedPath_Group : OptionGroup<"<Embed group>">, Group<Preprocessor_Group>,
-                        DocName<"Embed path management">,
-                        DocBrief<[{
-Flags controlling how ``#embed``\s and similar are resolved to files.}]>;
-
 def I_Group : OptionGroup<"<I group>">, Group<IncludePath_Group>, DocFlatten;
 def i_Group : OptionGroup<"<i group>">, Group<IncludePath_Group>, DocFlatten;
 def clang_i_Group : OptionGroup<"<clang i group>">, Group<i_Group>, DocFlatten;
@@ -830,10 +825,9 @@ will be ignored}]>;
 def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     Visibility<[ClangOption, FlangOption]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
-def embed_dir_EQ : Joined<["--"], "embed-dir=">,
-    Flags<[RenderJoined]>, Group<EmbedPath_Group>,
-    Visibility<[ClangOption, CC1Option]>,
-    MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
+def embed_dir_EQ : Joined<["--"], "embed-dir=">, Group<Preprocessor_Group>,
+    Visibility<[ClangOption, CC1Option]>, MetaVarName<"<dir>">,
+    HelpText<"Add directory to embed search path">;
 def MD : Flag<["-"], "MD">, Group<M_Group>,
     HelpText<"Write a depfile containing user and system headers">;
 def MMD : Flag<["-"], "MMD">, Group<M_Group>,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index c820b5ddaff3879..2d73f42772a29dc 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1351,8 +1351,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map,
-                   options::OPT_EmbedPath_Group});
+                   options::OPT_F, options::OPT_index_header_map});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
@@ -8280,9 +8279,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
-  // Pass along any --embed-dir or similar options so we get proper embed paths.
-  Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group);
-
   // Determine the original source input.
   auto FindSource = [](const Action *S) -> const Action * {
     while (S->getKind() != Action::InputClass) {

>From 6abddb659f36db37c32f05ad98c3db9a5efdaa53 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 27 Nov 2023 12:46:47 -0500
Subject: [PATCH 43/50] Replace some uses of string literals in diagnostics;
 NFC

---
 clang/lib/Lex/PPDirectives.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index aa7af037850f941..2cfad9f68b36900 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3655,7 +3655,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
     // Lex identifier [:: identifier ...]
     if (!CurTok.is(tok::identifier)) {
       Result.EndLoc = CurTok.getEndLoc();
-      Diag(CurTok, diag::err_expected) << "identifier";
+      Diag(CurTok, diag::err_expected) << tok::identifier;
       DiscardUntilEndOfDirective();
       return Result;
     }
@@ -3668,7 +3668,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       LexNonComment(CurTok);
       if (!CurTok.is(tok::identifier)) {
         Result.EndLoc = CurTok.getEndLoc();
-        Diag(CurTok, diag::err_expected) << "identifier";
+        Diag(CurTok, diag::err_expected) << tok::identifier;
         DiscardUntilEndOfDirective();
         return Result;
       }

>From 08004b85edbdbcb616df225112098f873a766f33 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 27 Nov 2023 13:09:17 -0500
Subject: [PATCH 44/50] Simplify the implementation somewhat; NFC intended

---
 clang/lib/Lex/PPMacroExpansion.cpp | 34 +++++++++---------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index cbaac20d0c7f1b0..36e81c4595bd512 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1280,44 +1280,30 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     Diag(Tok, diag::ext_pp_has_embed)
         << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
 
-  // Save the location of the current token.  If a '(' is later found, use
-  // that location.  If not, use the end of this location instead.
-  SourceLocation LParenLoc = Tok.getLocation();
-
   // These expressions are only allowed within a preprocessor directive.
   if (!this->isParsingIfOrElifDirective()) {
-    Diag(LParenLoc, diag::err_pp_directive_required) << II;
+    Diag(Tok, diag::err_pp_directive_required) << II;
     // Return a valid identifier token.
     assert(Tok.is(tok::identifier));
     Tok.setIdentifierInfo(II);
     return EmbedResult::NotFound;
   }
 
-  // Get '('. If we don't have a '(', try to form a header-name token.
-  do {
-    if (this->LexHeaderName(Tok)) {
-      return EmbedResult::NotFound;
-    }
-  } while (Tok.getKind() == tok::comment);
-
   // Ensure we have a '('.
+  LexUnexpandedToken(Tok);
   if (Tok.isNot(tok::l_paren)) {
-    // No '(', use end of last token.
-    LParenLoc = this->getLocForEndOfToken(LParenLoc);
-    this->Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
+    Diag(Tok, diag::err_pp_expected_after) << II << tok::l_paren;
     // If the next token looks like a filename or the start of one,
     // assume it is and process it as such.
-    if (Tok.isNot(tok::header_name)) {
-      return EmbedResult::NotFound;
-    }
-  } else {
-    // Save '(' location for possible missing ')' message.
-    LParenLoc = Tok.getLocation();
-    if (this->LexHeaderName(Tok)) {
-      return EmbedResult::NotFound;
-    }
+    return EmbedResult::NotFound;
   }
 
+  // Save '(' location for possible missing ')' message and then lex the header
+  // name token for the embed resource.
+  SourceLocation LParenLoc = Tok.getLocation();
+  if (this->LexHeaderName(Tok))
+    return EmbedResult::NotFound;
+
   if (Tok.isNot(tok::header_name)) {
     Diag(Tok.getLocation(), diag::err_pp_expects_filename);
     return EmbedResult::NotFound;

>From 257fc01593af2b689b40059f05ab3053e75d628c Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 27 Nov 2023 13:56:12 -0500
Subject: [PATCH 45/50] Correct the way we look up files with __has_embed vs
 #embed

We were calling LookupEmbedFile() differently between the two uses and
that difference resulted in situations where __has_embed would return
__STDC_EMBED_FOUND__ and #embed would then say the file cannot be
found.

This also corrects the path handling for quoted lookup -- we would
previously find files from `--embed-dir=` before we would find files
next to the source file. Include path search heuristics are that quoted
header names are looked up starting from the directory containing the
source file.
---
 clang/include/clang/Lex/Preprocessor.h        | 11 ++-----
 clang/lib/Lex/PPDirectives.cpp                | 29 +++++--------------
 clang/lib/Lex/PPMacroExpansion.cpp            |  4 +--
 clang/test/Preprocessor/embed___has_embed.c   | 12 ++++++++
 clang/test/Preprocessor/embed_path_quote.c    |  2 +-
 clang/test/Preprocessor/embed_single_entity.c |  2 +-
 6 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 92275ed27056cc0..5e2d039c58cb3bb 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2431,10 +2431,8 @@ class Preprocessor {
   /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
   /// reference is for system \#include's or not (i.e. using <> instead of "").
   OptionalFileEntryRef
-  LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
-                  bool OpenFile, const FileEntry *LookupFromFile = nullptr,
-                  SmallVectorImpl<char> *SearchPath = nullptr,
-                  SmallVectorImpl<char> *RelativePath = nullptr);
+  LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
+                  const FileEntry *LookupFromFile = nullptr);
 
   /// Return true if we're in the top-level file, not in a \#include.
   bool isInPrimaryFile() const;
@@ -2727,12 +2725,9 @@ class Preprocessor {
   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
                             const FileEntry *LookupFromFile = nullptr);
   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
-                                const Token &FilenameTok,
                                 StringRef ResolvedFilename,
-                                StringRef SearchPath, StringRef RelativePath,
                                 const LexEmbedParametersResult &Params,
-                                StringRef BinaryContents,
-                                const size_t TargetCharWidth);
+                                StringRef BinaryContents);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 2cfad9f68b36900..d6f9574eb6d4f0a 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1077,10 +1077,9 @@ OptionalFileEntryRef Preprocessor::LookupFile(
   return std::nullopt;
 }
 
-OptionalFileEntryRef Preprocessor::LookupEmbedFile(
-    SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
-    bool OpenFile, const FileEntry *LookupFromFile,
-    SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath) {
+OptionalFileEntryRef
+Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
+                              const FileEntry *LookupFromFile) {
   FileManager &FM = this->getFileManager();
   if (llvm::sys::path::is_absolute(Filename)) {
     // lookup path or immediately fail
@@ -1094,11 +1093,7 @@ OptionalFileEntryRef Preprocessor::LookupEmbedFile(
   // Non-angled lookup
   if (!isAngled) {
     bool TryLocalLookup = false;
-    if (SearchPath) {
-      // use the provided search path as the local lookup path
-      llvm::sys::path::native(*SearchPath, LookupPath);
-      TryLocalLookup = true;
-    } else if (LookupFromFile) {
+    if (LookupFromFile) {
       // Use file-based lookup here
       StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
       if (!FullFileDir.empty()) {
@@ -3816,10 +3811,8 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
 }
 
 void Preprocessor::HandleEmbedDirectiveImpl(
-    SourceLocation HashLoc, const Token &FilenameTok,
-    StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
-    const LexEmbedParametersResult &Params, StringRef BinaryContents,
-    const size_t TargetCharWidth) {
+    SourceLocation HashLoc, StringRef ResolvedFilename,
+    const LexEmbedParametersResult &Params, StringRef BinaryContents) {
   // Pass off the annotation token stream. The parser expects:
   //   if_empty-tokens or
   //     prefix-tokens (if any)
@@ -3942,10 +3935,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
 
   // Now, splat the data out!
   SmallString<128> FilenameBuffer;
-  SmallString<512> SearchPath;
-  SmallString<512> RelativePath;
   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
-  SourceLocation FilenameLoc = FilenameTok.getLocation();
   StringRef OriginalFilename = Filename;
   bool isAngled =
       GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
@@ -3953,8 +3943,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   // error.
   assert(!Filename.empty());
   OptionalFileEntryRef MaybeFileRef =
-      this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
-                            LookupFromFile, &SearchPath, &RelativePath);
+      this->LookupEmbedFile(Filename, isAngled, false, LookupFromFile);
   if (!MaybeFileRef) {
     // could not find file
     if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
@@ -4014,7 +4003,5 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   if (Callbacks)
     Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
                               Params);
-  HandleEmbedDirectiveImpl(HashLoc, FilenameTok, Filename, SearchPath,
-                           RelativePath, Params, BinaryContents,
-                           TargetCharWidth);
+  HandleEmbedDirectiveImpl(HashLoc, Filename, Params, BinaryContents);
 }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 36e81c4595bd512..45e3fa14b922147 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1331,7 +1331,6 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   }
 
   SmallString<128> FilenameBuffer;
-  SmallString<256> RelativePath;
   StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
   StringRef OriginalFilename = Filename;
   bool isAngled =
@@ -1343,8 +1342,7 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
       this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
                                   : static_cast<FileEntry *>(nullptr);
   OptionalFileEntryRef MaybeFileEntry =
-      this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
-                            LookupFromFile, nullptr, &RelativePath);
+      this->LookupEmbedFile(Filename, isAngled, false, LookupFromFile);
   if (Callbacks) {
     Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
   }
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
index d1d3d11cc461cb4..00acd7c4fa057be 100644
--- a/clang/test/Preprocessor/embed___has_embed.c
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -45,3 +45,15 @@
 #elif __has_embed(<media/art.txt> if_empty(meow)) != __STDC_EMBED_FOUND__
 #error 14
 #endif
+
+// Ensure that when __has_embed returns true, the file can actually be
+// embedded. This was previously failing because the way in which __has_embed
+// would search for files was differentl from how #embed would resolve them
+// when the file path included relative path markers like `./` or `../`.
+#if __has_embed("./embed___has_embed.c") == __STDC_EMBED_FOUND__
+unsigned char buffer[] = {
+#embed "./embed___has_embed.c"
+};
+#else
+#error 15
+#endif
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
index d62e81d3df7d9d6..79ca1e5c811b813 100644
--- a/clang/test/Preprocessor/embed_path_quote.c
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -5,4 +5,4 @@ const char data[] = {
 #embed "single_byte.txt"
 };
 static_assert(sizeof(data) == 1);
-static_assert('b' == data[0]);
+static_assert('a' == data[0]);
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
index 945beda628e7890..2019118b48d322d 100644
--- a/clang/test/Preprocessor/embed_single_entity.c
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 %s -fsyntax-only -std=c23 --embed-dir=%S/Inputs -verify
 
 const char data =
-#embed "single_byte.txt"
+#embed <single_byte.txt>
 ;
 _Static_assert('b' == data);
 // expected-no-diagnostics

>From 137961addf411b619eeebda7b40c38aa2b0e80f8 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 27 Nov 2023 14:39:41 -0500
Subject: [PATCH 46/50] Test and fix the behavior for dependency file
 generation

This adds back some changes from d6c3c908df237d21646bf270e3650b1e32078a88
that were necessary after all, but were untested.

Both __has_embed and #embed will contribute to the dependency file.
---
 clang/lib/Driver/ToolChains/Clang.cpp        |  6 +++++-
 clang/lib/Frontend/DependencyFile.cpp        | 21 ++++++++++----------
 clang/test/Preprocessor/embed_dependencies.c | 20 +++++++++++++++++++
 3 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/Preprocessor/embed_dependencies.c

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 2d73f42772a29dc..c6eb26ee943bc2b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1351,7 +1351,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map});
+                   options::OPT_F, options::OPT_index_header_map,
+                   options::OPT_embed_dir_EQ});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
@@ -8279,6 +8280,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
+  // Pass along any --embed-dir or similar options so we get proper embed paths.
+  Args.AddAllArgs(CmdArgs, options::OPT_embed_dir_EQ);
+
   // Determine the original source input.
   auto FindSource = [](const Action *S) -> const Action * {
     while (S->getKind() != Action::InputClass) {
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 10b1f146377ec11..5786c71824c8ac4 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -62,16 +62,17 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
-  void EmbedDirective(SourceLocation, StringRef FileName, bool,
+  void EmbedDirective(SourceLocation, StringRef, bool,
                       OptionalFileEntryRef File,
-                      const LexEmbedParametersResult &Params) override {
-    if (!File)
-      DepCollector.maybeAddDependency(FileName,
-                                      /*FromModule*/ false,
-                                      /*IsSystem*/ false,
-                                      /*IsModuleFile*/ false,
-                                      /*IsMissing*/ true);
-    // Files that actually exist are handled by FileChanged.
+                      const LexEmbedParametersResult &) override {
+    assert(File && "expected to only be called when the file is found");
+    StringRef FileName =
+        llvm::sys::path::remove_leading_dotslash(File->getName());
+    DepCollector.maybeAddDependency(FileName,
+                                    /*FromModule*/ false,
+                                    /*IsSystem*/ false,
+                                    /*IsModuleFile*/ false,
+                                    /*IsMissing*/ false);
   }
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
@@ -88,7 +89,7 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
     // Files that actually exist are handled by FileChanged.
   }
 
-  void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
+  void HasEmbed(SourceLocation, StringRef, bool,
                 OptionalFileEntryRef File) override {
     if (!File)
       return;
diff --git a/clang/test/Preprocessor/embed_dependencies.c b/clang/test/Preprocessor/embed_dependencies.c
new file mode 100644
index 000000000000000..4e00dc79ac190b4
--- /dev/null
+++ b/clang/test/Preprocessor/embed_dependencies.c
@@ -0,0 +1,20 @@
+// RUN: %clang %s -fsyntax-only -std=c23 -M --embed-dir=%S/Inputs -Xclang -verify | FileCheck %s
+
+// Yes this looks very strange indeed, but the goal is to test that we add
+// files referenced by both __has_embed and #embed when we generate
+// dependencies, so we're trying to see that both of these files are in the
+// output.
+#if __has_embed(<jk.txt>)
+const char data =
+#embed "Inputs/single_byte.txt"
+;
+_Static_assert('b' == data);
+#else
+#error "oops"
+#endif
+// expected-no-diagnostics
+
+// CHECK: embed_dependencies.c \
+// CHECK-NEXT: jk.txt \
+// CHECK-NEXT: Inputs{{[/\\]}}single_byte.txt
+

>From a91723c67e2032bee9dd29df7f96b5e0d6145d07 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 29 Nov 2023 14:58:07 -0500
Subject: [PATCH 47/50] Remove changes for clang-format

The changes weren't correct and the clang-format folks are in a better
position to lead this effort anyway. It will be handled in a follow-up.
---
 clang/lib/Format/FormatToken.h      |  2 --
 clang/lib/Format/TokenAnnotator.cpp | 27 ---------------------------
 2 files changed, 29 deletions(-)

diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index aad126cdcff28e7..14a3c21ba44eaee 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -1012,7 +1012,6 @@ struct AdditionalKeywords {
     kw_synchronized = &IdentTable.get("synchronized");
     kw_throws = &IdentTable.get("throws");
     kw___except = &IdentTable.get("__except");
-    kw___has_embed = &IdentTable.get("__has_embed");
     kw___has_include = &IdentTable.get("__has_include");
     kw___has_include_next = &IdentTable.get("__has_include_next");
 
@@ -1310,7 +1309,6 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_NS_ERROR_ENUM;
   IdentifierInfo *kw_NS_OPTIONS;
   IdentifierInfo *kw___except;
-  IdentifierInfo *kw___has_embed;
   IdentifierInfo *kw___has_include;
   IdentifierInfo *kw___has_include_next;
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 542d9d91c5b5f2e..bffb59f5d1175d3 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1418,8 +1418,6 @@ class AnnotatingParser {
       if (Tok->isOneOf(Keywords.kw___has_include,
                        Keywords.kw___has_include_next)) {
         parseHasInclude();
-      } else if (Tok->is(Keywords.kw___has_embed)) {
-        parseHasEmbed();
       }
       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
           Tok->Next->isNot(tok::l_paren)) {
@@ -1483,21 +1481,6 @@ class AnnotatingParser {
     }
   }
 
-  void parseEmbedDirective() {
-    if (CurrentToken && CurrentToken->is(tok::less)) {
-      next();
-      while (CurrentToken) {
-        // Mark tokens up to the trailing line comments as implicit string
-        // literals.
-        if (CurrentToken->isNot(tok::comment) &&
-            !CurrentToken->TokenText.startswith("//")) {
-          CurrentToken->setType(TT_ImplicitStringLiteral);
-        }
-        next();
-      }
-    }
-  }
-
   void parseWarningOrError() {
     next();
     // We still want to format the whitespace left of the first token of the
@@ -1534,14 +1517,6 @@ class AnnotatingParser {
     next(); // ')'
   }
 
-  void parseHasEmbed() {
-    if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
-      return;
-    next(); // '('
-    parseEmbedDirective();
-    next(); // ')'
-  }
-
   LineType parsePreprocessorDirective() {
     bool IsFirstToken = CurrentToken->IsFirst;
     LineType Type = LT_PreprocessorDirective;
@@ -1605,8 +1580,6 @@ class AnnotatingParser {
       } else if (Tok->isOneOf(Keywords.kw___has_include,
                               Keywords.kw___has_include_next)) {
         parseHasInclude();
-      } else if (Tok->is(Keywords.kw___has_embed)) {
-        parseHasEmbed();
       }
     }
     return Type;

>From 0d6ea7d405af7c1f9482ed31278f6c3d9659c00b Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 30 Nov 2023 10:29:57 -0500
Subject: [PATCH 48/50] No longer pass around base64 encoded data

Instead, pass the binary data. If we want to add support for something
like #embed_base64, we can do so by taking the base64 encoded data from
the directive and decoding it into a binary blob and pass it off like
we would for #embed.

These changes also remove diagnostics related to CHAR_BIT != 8; we do
not support any such targets currently and so that code is untestable.
---
 clang/include/clang/AST/Expr.h                |  2 +-
 .../clang/Basic/DiagnosticCommonKinds.td      |  3 -
 clang/include/clang/Sema/Sema.h               |  4 +-
 clang/lib/AST/Expr.cpp                        | 11 ++-
 clang/lib/AST/StmtPrinter.cpp                 |  1 -
 .../lib/Frontend/PrintPreprocessedOutput.cpp  | 12 +--
 clang/lib/Lex/PPDirectives.cpp                | 24 ++----
 clang/lib/Parse/ParseExpr.cpp                 | 80 +++++++------------
 clang/lib/Sema/SemaExpr.cpp                   | 16 +---
 llvm/include/llvm/Support/Base64.h            | 36 ++++-----
 10 files changed, 64 insertions(+), 125 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 5f3b94a86f687c5..3e5a3d4ade8c7c8 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4826,7 +4826,7 @@ class PPEmbedExpr final : public Expr {
     Expanded,
   };
 
-  PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, StringLiteral *Filename,
+  PPEmbedExpr(const ASTContext &Ctx, StringLiteral *Filename,
               StringLiteral *BinaryData, SourceLocation BLoc,
               SourceLocation RParenLoc, DeclContext *Context);
 
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index ef1f36f0de387ac..65a33f61a6948ad 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -59,9 +59,6 @@ def err_expected_string_literal : Error<"expected string literal "
           "'external_source_symbol' attribute|"
           "as argument of '%1' attribute}0">;
 
-def err_builtin_pp_embed_invalid_argument : Error<
-  "invalid argument to '__builtin_pp_embed': %0">;
-
 def err_invalid_string_udl : Error<
   "string literal with user-defined suffix cannot be used here">;
 def err_invalid_character_udl : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index f4789a66f93a06d..418ec7b19b386e3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6105,9 +6105,9 @@ class Sema final {
 
   // #embed
   ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
-                              SourceLocation Base64DataLocation,
+                              SourceLocation BinaryDataLoc,
                               SourceLocation RPLoc, StringLiteral *Filename,
-                              QualType DataTy, std::vector<char> BinaryData);
+                              StringLiteral *BinaryData);
 
   IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed,
                                           bool FirstElement = true);
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index abd55893c21cbff..880913f37c446fb 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2345,16 +2345,15 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
   llvm_unreachable("unhandled case");
 }
 
-PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy,
-                         StringLiteral *Filename, StringLiteral *BinaryData,
-                         SourceLocation BLoc, SourceLocation RParenLoc,
-                         DeclContext *ParentContext)
-    : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary),
+PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, StringLiteral *Filename,
+                         StringLiteral *BinaryData, SourceLocation BLoc,
+                         SourceLocation RParenLoc, DeclContext *ParentContext)
+    : Expr(PPEmbedExprClass, Ctx.UnsignedCharTy, VK_PRValue, OK_Ordinary),
       BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext),
       Filename(Filename), BinaryData(BinaryData), Ctx(&Ctx) {
   setDependence(ExprDependence::None);
   FakeChildNode = IntegerLiteral::Create(
-      Ctx, llvm::APInt::getZero(Ctx.getTypeSize(ResultTy)), ResultTy, BLoc);
+      Ctx, llvm::APInt::getZero(Ctx.getTypeSize(getType())), getType(), BLoc);
 }
 
 size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const {
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index fa429ed7c7a0493..f611975331021dc 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -49,7 +49,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Base64.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 61ef7c2911b4066..58a65f13ddce1a9 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -24,7 +24,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Base64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
@@ -477,7 +476,7 @@ void PrintPPOutputPPCallbacks::EmbedDirective(
   // Preprocessor::HandleEmbedDirectiveImpl(); if we start emitting more tokens
   // while preprocessing, we will need to update this logic as well.
   if (SkipAnnotToks)
-    NumToksToSkip += 7;
+    NumToksToSkip += 5;
 
   *OS << " /* clang -E -dE */";
   setEmittedDirectiveOnThisLine();
@@ -988,13 +987,8 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       PP.Lex(Tok);
       assert(Tok.is(tok::string_literal) && "expected string literal token");
 
-      std::vector<char> BinaryContents;
-      llvm::Error Err = llvm::decodeBase64(
-          StringRef(Tok.getLiteralData() + 1, Tok.getLength() - 2),
-          BinaryContents); // +1 and -2 are to skip quotation marks.
-      // We expect no errors because we're the one to generate the original
-      // contents.
-      assert(!Err && "expected no base64 decoding errors");
+      // +1 and -2 are to skip quotation marks.
+      StringRef BinaryContents(Tok.getLiteralData() + 1, Tok.getLength() - 2);
 
       // Loop over the contents and print them as a comma-delimited list of
       // values.
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index d6f9574eb6d4f0a..08d4c59bc9a0e92 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -44,7 +44,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/AlignOf.h"
-#include "llvm/Support/Base64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -3817,14 +3816,13 @@ void Preprocessor::HandleEmbedDirectiveImpl(
   //   if_empty-tokens or
   //     prefix-tokens (if any)
   //     embed-annotation-start
-  //       type-name string-literal , string-literal
+  //       string-literal , string-literal
   //     embed-annotation-stop
   //     suffix-tokens (if any)
-  // where the type-name is the type used for each element to embed, the first
-  // string-literal is the resolved file name of the file we loaded contents
-  // from, and the second string-literal is the base64 encoded data we loaded
-  // from the file. The comma separation between string-literals prevents the
-  // literals from combining into a single string literal.
+  // where the first string-literal is the resolved file name of the file we
+  // loaded contents from, and the second string-literal is the binary data we
+  // loaded from the file. The comma separation between string-literals
+  // prevents the literals from combining into a single string literal.
   //
   // NOTE: if you change the token sequence, you will need to update
   // Parser::ParseCastExpression() (the case for tok::annot_embed_start) as
@@ -3860,7 +3858,7 @@ void Preprocessor::HandleEmbedDirectiveImpl(
 
   size_t NumPrefixToks = Params.PrefixTokenCount(),
          NumSuffixToks = Params.SuffixTokenCount();
-  size_t TotalNumToks = 7 + NumPrefixToks + NumSuffixToks;
+  size_t TotalNumToks = 5 + NumPrefixToks + NumSuffixToks;
   size_t CurIdx = 0;
   auto Toks = std::make_unique<Token[]>(TotalNumToks);
 
@@ -3873,21 +3871,13 @@ void Preprocessor::HandleEmbedDirectiveImpl(
   // Now annotate the embed itself.
   SetAnnotTok(Toks[CurIdx++], tok::annot_embed_start, HashLoc);
 
-  Toks[CurIdx].startToken();
-  Toks[CurIdx].setLocation(HashLoc);
-  Toks[CurIdx++].setKind(tok::kw_unsigned);
-
-  Toks[CurIdx].startToken();
-  Toks[CurIdx].setLocation(HashLoc);
-  Toks[CurIdx++].setKind(tok::kw_char);
-
   SetStrTok(Toks[CurIdx++], ResolvedFilename, HashLoc);
 
   Toks[CurIdx].startToken();
   Toks[CurIdx].setLocation(HashLoc);
   Toks[CurIdx++].setKind(tok::comma);
 
-  SetStrTok(Toks[CurIdx++], llvm::encodeBase64(BinaryContents), HashLoc);
+  SetStrTok(Toks[CurIdx++], BinaryContents, HashLoc);
 
   SetAnnotTok(Toks[CurIdx++], tok::annot_embed_end, HashLoc);
 
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index c6e327e2a01cc8f..702eb4e19f4f49d 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -32,7 +32,6 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/TypoCorrection.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Base64.h"
 #include <optional>
 using namespace clang;
 
@@ -1057,69 +1056,46 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
     // The preprocessor has already validated the syntax of the #embed
     // directive and has produced this series of tokens, so we do not need to
     // check for syntactic correctness. The form will be:
-    //    type-name string-literal , string-literal
+    //    string-literal , string-literal
     //
-    // where the type-name is the type of the elements to embed, the first
-    // string-literal is the file name the user passed to the directive, and
-    // the second string-literal is base64 encoded data from that file.
+    // where the first string-literal is the file name the user passed to the
+    // directive, and the second string-literal is the binary data from that
+    // file.
     SourceLocation StartLoc = ConsumeAnnotationToken();
     SourceRange DataTyExprSourceRange;
-    TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
-    ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
+    ExprResult FilenameArgExpr = ParseUnevaluatedStringLiteralExpression();
     // There is a comma separating the string literals to prevent them from
     // combining into a single string literal.
     ExpectAndConsume(tok::comma);
-    ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
-
-    const ASTContext &Context = Actions.getASTContext();
-    QualType DataTy = DataTyExpr.get().get().getCanonicalType();
-    size_t TargetWidth = Context.getTypeSize(DataTy);
-    if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
-        DataTy.getUnqualifiedType() != Context.CharTy) {
-      // TODO: check if is exactly the same as unsigned char
-      Diag(DataTyExprSourceRange.getBegin(),
-           diag::err_builtin_pp_embed_invalid_argument)
-          << "only 'char' and 'unsigned char' are supported";
-      Res = ExprError();
-    }
-    if ((TargetWidth % CHAR_BIT) != 0) {
-      Diag(DataTyExprSourceRange.getBegin(),
-           diag::err_builtin_pp_embed_invalid_argument)
-          << "width of element type is not a multiple of host platform's "
-             "CHAR_BIT!";
-      Res = ExprError();
-    }
-
-    StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
-    std::vector<char> BinaryData;
-    StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
-    if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
-      Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
-          << 0
-          << "'__builtin_pp_embed' with valid base64 encoding that is an "
-             "ordinary \"...\" string";
-    }
-    const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
-      Diag(Base64Str->getExprLoc(), diag::err_builtin_pp_embed_invalid_argument)
-          << "expected a valid base64 encoded string";
-    };
-    llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
-    llvm::handleAllErrors(std::move(Err), OnDecodeError);
-    if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
-      Diag(DataTyExprSourceRange.getBegin(),
-           diag::err_builtin_pp_embed_invalid_argument)
-          << "size of data does not split evently into the number of bytes "
-             "requested";
-      Res = ExprError();
-    }
+    // We need a real string literal expression and not an unevaluated one
+    // because this string literal may be used by list initialization, which
+    // asserts that it's not given an unevaluated string literal. So we will
+    // parse the unevaluated string literal and cook it into a real literal
+    // that we can use. We cannot parse an actual string literal expression
+    // because that leaves us with two problems with the string's type: 1) the
+    // element type will be char and not unsigned char, 2) the array type will
+    // account for the null terminator but the source data is not null
+    // terminated because it's not a real string literal.
+    ExprResult BinaryData = ParseUnevaluatedStringLiteralExpression();
+    StringLiteral *UnevalBinData = BinaryData.getAs<StringLiteral>();
+    ASTContext &Context = Actions.getASTContext();
+    uint64_t ArraySizeRawVal[] = {UnevalBinData->getByteLength()};
+    llvm::APSInt ArraySize(llvm::APInt(
+        Context.getTypeSize(Context.getSizeType()), 1, ArraySizeRawVal));
+    QualType ArrayTy =
+        Context.getConstantArrayType(Context.UnsignedCharTy, ArraySize, nullptr,
+                                     ArraySizeModifier::Normal, 0);
+    StringLiteral *BinaryDataLiteral = StringLiteral::Create(
+        Context, UnevalBinData->getBytes(), StringLiteralKind::Ordinary, false,
+        ArrayTy, UnevalBinData->getExprLoc());
 
     // Now we expect the end annotation token.
     assert(Tok.is(tok::annot_embed_end));
     SourceLocation EndLoc = ConsumeAnnotationToken();
     if (!Res.isInvalid()) {
       Res = Actions.ActOnPPEmbedExpr(
-          StartLoc, Base64ArgExpr.get()->getExprLoc(), EndLoc, FilenameLiteral,
-          DataTy, std::move(BinaryData));
+          StartLoc, BinaryData.get()->getExprLoc(), EndLoc,
+          FilenameArgExpr.getAs<StringLiteral>(), BinaryDataLiteral);
     }
   } break;
 
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index e4a19b864b06345..cef5f93b9f28a55 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17606,21 +17606,11 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
 }
 
 ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
-                                  SourceLocation Base64DataLocation,
+                                  SourceLocation BinaryDataLoc,
                                   SourceLocation RPLoc, StringLiteral *Filename,
-                                  QualType ElementTy,
-                                  std::vector<char> BinaryData) {
-  uint64_t ArraySizeRawVal[] = {BinaryData.size()};
-  llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
-                                     1, ArraySizeRawVal));
-  QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
-                                                  ArraySizeModifier::Normal, 0);
-  StringLiteral *BinaryDataLiteral = StringLiteral::Create(
-      Context, StringRef(BinaryData.data(), BinaryData.size()),
-      StringLiteralKind::Ordinary, false, ArrayTy, Base64DataLocation);
+                                  StringLiteral *BinaryData) {
   return new (Context)
-      PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
-                  RPLoc, CurContext);
+      PPEmbedExpr(Context, Filename, BinaryData, BuiltinLoc, RPLoc, CurContext);
 }
 
 IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed,
diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h
index 8fcef706e916733..3d96884749b32f4 100644
--- a/llvm/include/llvm/Support/Base64.h
+++ b/llvm/include/llvm/Support/Base64.h
@@ -20,43 +20,37 @@
 
 namespace llvm {
 
-template <class InputBytes, class OutputContainer>
-void encodeBase64(InputBytes const &Bytes, OutputContainer &OutputBuffer) {
+template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
   static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                               "abcdefghijklmnopqrstuvwxyz"
                               "0123456789+/";
-  const std::size_t IndexOffset = OutputBuffer.size();
-  OutputBuffer.resize(OutputBuffer.size() + (((Bytes.size() + 2) / 3) * 4));
+  std::string Buffer;
+  Buffer.resize(((Bytes.size() + 2) / 3) * 4);
 
   size_t i = 0, j = 0;
   for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) {
     uint32_t x = ((unsigned char)Bytes[i] << 16) |
                  ((unsigned char)Bytes[i + 1] << 8) |
                  (unsigned char)Bytes[i + 2];
-    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
-    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
-    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
-    OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = Table[(x >> 6) & 63];
+    Buffer[j + 3] = Table[x & 63];
   }
   if (i + 1 == Bytes.size()) {
     uint32_t x = ((unsigned char)Bytes[i] << 16);
-    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
-    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
-    OutputBuffer[IndexOffset + j + 2] = '=';
-    OutputBuffer[IndexOffset + j + 3] = '=';
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = '=';
+    Buffer[j + 3] = '=';
   } else if (i + 2 == Bytes.size()) {
     uint32_t x =
         ((unsigned char)Bytes[i] << 16) | ((unsigned char)Bytes[i + 1] << 8);
-    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
-    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
-    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
-    OutputBuffer[IndexOffset + j + 3] = '=';
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = Table[(x >> 6) & 63];
+    Buffer[j + 3] = '=';
   }
-}
-
-template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
-  std::string Buffer;
-  encodeBase64(Bytes, Buffer);
   return Buffer;
 }
 

>From fbb0ff203ffc4379a3212b2a11c30bb375676f90 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 30 Nov 2023 11:09:16 -0500
Subject: [PATCH 49/50] Fix the diagnostic group for unknown directives

---
 clang/include/clang/Basic/DiagnosticGroups.td   | 1 -
 clang/include/clang/Basic/DiagnosticLexKinds.td | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 067a9a96e0c6d16..091f056a8f7f4dc 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -713,7 +713,6 @@ def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>
 def RestrictExpansionMacro : DiagGroup<"restrict-expansion">;
 def FinalMacro : DiagGroup<"final-macro">;
 // Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions)
-def UnsupportedDirective : DiagGroup<"unsupported-directive">;
 def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">;
 def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">;
 def DirectiveParameters : DiagGroup<"directive-parameters",
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 97d3856b4fc92e9..807c34e72308f78 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -493,8 +493,7 @@ def ext_pp_gnu_line_directive : Extension<
 def err_pp_invalid_directive : Error<
   "invalid preprocessing directive%select{|, did you mean '#%1'?}0">;
 def warn_pp_invalid_directive : Warning<
-  err_pp_invalid_directive.Summary>,
-  InGroup<UnsupportedDirective>;
+  err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
 def warn_pp_unknown_parameter_ignored : Warning<
   "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">,
   InGroup<UnknownDirectiveParameters>;

>From 250ec6be63d63f38cee1f7ee7cbf342cbab456fa Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 30 Nov 2023 11:18:07 -0500
Subject: [PATCH 50/50] Remove a newly-added unused diagnostic group, add test
 coverage

---
 clang/include/clang/Basic/DiagnosticGroups.td          | 5 -----
 clang/include/clang/Basic/DiagnosticLexKinds.td        | 2 +-
 clang/test/Preprocessor/embed_parameter_unrecognized.c | 2 ++
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 091f056a8f7f4dc..ff028bbbf74261e 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -712,11 +712,6 @@ def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">;
 def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>;
 def RestrictExpansionMacro : DiagGroup<"restrict-expansion">;
 def FinalMacro : DiagGroup<"final-macro">;
-// Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions)
-def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">;
-def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">;
-def DirectiveParameters : DiagGroup<"directive-parameters",
-    [UnknownDirectiveParameters, IgnoredDirectiveParameters]>;
 
 // Just silence warnings about -Wstrict-aliasing for now.
 def : DiagGroup<"strict-aliasing=0">;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 807c34e72308f78..2449b0f83bea8f4 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -496,7 +496,7 @@ def warn_pp_invalid_directive : Warning<
   err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
 def warn_pp_unknown_parameter_ignored : Warning<
   "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">,
-  InGroup<UnknownDirectiveParameters>;
+  InGroup<DiagGroup<"unknown-directive-parameters">>;
 def err_pp_unsupported_directive : Error<
   "unsupported%select{ | embed}0 directive: %1">;
 def err_pp_directive_required : Error<
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
index aa8a8c41f607a9a..4b41695907b5f22 100644
--- a/clang/test/Preprocessor/embed_parameter_unrecognized.c
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 %s -std=c23 -E -verify
+// RUN: %clang_cc1 %s -std=c23 -E -Wno-unknown-directive-parameters -verify=okay
+// okay-no-diagnostics
 
 #embed __FILE__ unrecognized
 // expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized' ignored}}



More information about the cfe-commits mailing list