[clang] [compiler-rt] [flang] [libcxx] [llvm] [libc] [clang-tools-extra] [lldb] ✨ [Sema, Lex, Parse] Preprocessor embed in C and C++ (and Obj-C and Obj-C++ by-proxy) (PR #68620)

Aaron Ballman via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 9 05:13:25 PST 2023


https://github.com/AaronBallman updated https://github.com/llvm/llvm-project/pull/68620

>From 7050c932f63f9cb9e94636b287887f8241083117 Mon Sep 17 00:00:00 2001
From: ThePhD <phdofthehouse at gmail.com>
Date: Thu, 28 Sep 2023 18:31:34 -0400
Subject: [PATCH 01/23] =?UTF-8?q?=E2=9C=A8=20[Sema,=20Driver,=20Lex,=20Fro?=
 =?UTF-8?q?ntend]=20Implement=20naive=20#embed=20for=20C23=20and=20C++26.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🛠 [Frontend] Ensure commas inserted by #embed are properly serialized to output
---
 clang/CMakeLists.txt                          |   3 +-
 clang/include/clang/Basic/Builtins.def        |   3 +
 clang/include/clang/Basic/DiagnosticGroups.td |   6 +
 .../include/clang/Basic/DiagnosticLexKinds.td |  24 +-
 clang/include/clang/Basic/FileManager.h       |   8 +-
 clang/include/clang/Basic/TokenKinds.def      |   7 +
 clang/include/clang/Driver/Options.td         |  16 +
 .../Frontend/PreprocessorOutputOptions.h      |   2 +
 clang/include/clang/Lex/PPCallbacks.h         |  77 ++-
 clang/include/clang/Lex/Preprocessor.h        |  66 ++-
 clang/include/clang/Lex/PreprocessorOptions.h |   7 +
 clang/lib/Basic/FileManager.cpp               |   8 +-
 clang/lib/Basic/IdentifierTable.cpp           |   3 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   5 +-
 clang/lib/Format/FormatToken.h                |   2 +
 clang/lib/Format/TokenAnnotator.cpp           |  28 +
 clang/lib/Frontend/CompilerInvocation.cpp     |  19 +
 clang/lib/Frontend/DependencyFile.cpp         |  29 +
 clang/lib/Frontend/DependencyGraph.cpp        |  43 +-
 clang/lib/Frontend/InitPreprocessor.cpp       |   7 +
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  25 +-
 .../Frontend/Rewrite/InclusionRewriter.cpp    |  13 +
 clang/lib/Lex/PPCallbacks.cpp                 |  11 -
 clang/lib/Lex/PPDirectives.cpp                | 500 ++++++++++++++++++
 clang/lib/Lex/PPExpressions.cpp               |  44 +-
 clang/lib/Lex/PPMacroExpansion.cpp            | 120 +++++
 clang/test/Preprocessor/Inputs/jk.txt         |   1 +
 clang/test/Preprocessor/Inputs/media/art.txt  |   9 +
 clang/test/Preprocessor/Inputs/media/empty    |   0
 .../test/Preprocessor/Inputs/single_byte.txt  |   1 +
 clang/test/Preprocessor/embed___has_embed.c   |  34 ++
 .../embed___has_embed_supported.c             |  24 +
 .../test/Preprocessor/embed_feature_test.cpp  |  13 +
 .../test/Preprocessor/embed_file_not_found.c  |   4 +
 clang/test/Preprocessor/embed_init.c          |  28 +
 .../Preprocessor/embed_parameter_if_empty.c   |  16 +
 .../test/Preprocessor/embed_parameter_limit.c |  15 +
 .../Preprocessor/embed_parameter_offset.c     |  15 +
 .../Preprocessor/embed_parameter_prefix.c     |  15 +
 .../Preprocessor/embed_parameter_suffix.c     |  15 +
 .../embed_parameter_unrecognized.c            |   8 +
 clang/test/Preprocessor/embed_path_chevron.c  |   8 +
 clang/test/Preprocessor/embed_path_quote.c    |   8 +
 clang/test/Preprocessor/single_byte.txt       |   1 +
 llvm/CMakeLists.txt                           |   7 +
 llvm/cmake/modules/GetHostTriple.cmake        |   6 +-
 46 files changed, 1264 insertions(+), 40 deletions(-)
 create mode 100644 clang/test/Preprocessor/Inputs/jk.txt
 create mode 100644 clang/test/Preprocessor/Inputs/media/art.txt
 create mode 100644 clang/test/Preprocessor/Inputs/media/empty
 create mode 100644 clang/test/Preprocessor/Inputs/single_byte.txt
 create mode 100644 clang/test/Preprocessor/embed___has_embed.c
 create mode 100644 clang/test/Preprocessor/embed___has_embed_supported.c
 create mode 100644 clang/test/Preprocessor/embed_feature_test.cpp
 create mode 100644 clang/test/Preprocessor/embed_file_not_found.c
 create mode 100644 clang/test/Preprocessor/embed_init.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_if_empty.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_limit.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_offset.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_prefix.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_suffix.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_unrecognized.c
 create mode 100644 clang/test/Preprocessor/embed_path_chevron.c
 create mode 100644 clang/test/Preprocessor/embed_path_quote.c
 create mode 100644 clang/test/Preprocessor/single_byte.txt

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 9b52c58be41e7f7..1b88905da3b8597 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -300,6 +300,7 @@ configure_file(
   ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc)
 
 # Add appropriate flags for GCC
+option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
   if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -307,7 +308,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   endif ()
 
   # Enable -pedantic for Clang even if it's not enabled for LLVM.
-  if (NOT LLVM_ENABLE_PEDANTIC)
+  if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long")
   endif ()
 
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 6ea8484606cfd5d..0dfc6456daf059a 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1766,6 +1766,9 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
 // Arithmetic Fence: to prevent FP reordering and reassociation optimizations
 LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES)
 
+// preprocessor embed builtin
+LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES)
+
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 0b09c002191848a..89f6715cebfdc0d 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -708,6 +708,12 @@ def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">;
 def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>;
 def RestrictExpansionMacro : DiagGroup<"restrict-expansion">;
 def FinalMacro : DiagGroup<"final-macro">;
+// Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions)
+def UnsupportedDirective : DiagGroup<"unsupported-directive">;
+def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">;
+def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">;
+def DirectiveParameters : DiagGroup<"directive-parameters",
+    [UnknownDirectiveParameters, IgnoredDirectiveParameters]>;
 
 // Just silence warnings about -Wstrict-aliasing for now.
 def : DiagGroup<"strict-aliasing=0">;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 940cca67368492f..4490f40806b0345 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -422,6 +422,22 @@ def warn_cxx23_compat_warning_directive : Warning<
 def warn_c23_compat_warning_directive : Warning<
   "#warning is incompatible with C standards before C23">,
   InGroup<CPre23Compat>, DefaultIgnore;
+def warn_c23_pp_embed : Warning<
+  "'__has_embed' is a C23 extension">,
+  InGroup<CPre23Compat>,
+  DefaultIgnore;
+def warn_c23_pp_has_embed : Warning<
+  "'__has_embed' is a C23 extension">,
+  InGroup<CPre23Compat>,
+  DefaultIgnore;
+def warn_cxx26_pp_embed : Warning<
+  "'__has_embed' is a C++26 extension">,
+  InGroup<CXXPre26Compat>,
+  DefaultIgnore;
+def warn_cxx26_pp_has_embed : Warning<
+  "'__has_embed' is a C++26 extension">,
+  InGroup<CXXPre26Compat>,
+  DefaultIgnore;
 
 def ext_pp_extra_tokens_at_eol : ExtWarn<
   "extra tokens at end of #%0 directive">, InGroup<ExtraTokens>;
@@ -483,7 +499,13 @@ def ext_pp_gnu_line_directive : Extension<
 def err_pp_invalid_directive : Error<
   "invalid preprocessing directive%select{|, did you mean '#%1'?}0">;
 def warn_pp_invalid_directive : Warning<
-  err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
+  err_pp_invalid_directive.Summary>,
+  InGroup<UnsupportedDirective>;
+def warn_pp_unknown_parameter_ignored : Warning<
+  "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">,
+  InGroup<UnknownDirectiveParameters>;
+def err_pp_unsupported_directive : Error<
+  "unsupported%select{ | embed}0 directive: %1">;
 def err_pp_directive_required : Error<
   "%0 must be used within a preprocessing directive">;
 def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal;
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 56cb093dd8c376f..c757f8775b425e9 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -276,11 +276,13 @@ class FileManager : public RefCountedBase<FileManager> {
   /// MemoryBuffer if successful, otherwise returning null.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(FileEntryRef Entry, bool isVolatile = false,
-                   bool RequiresNullTerminator = true);
+                   bool RequiresNullTerminator = true,
+                   std::optional<int64_t> MaybeLimit = std::nullopt);
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(StringRef Filename, bool isVolatile = false,
-                   bool RequiresNullTerminator = true) {
-    return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
+                   bool RequiresNullTerminator = true,
+                   std::optional<int64_t> MaybeLimit = std::nullopt) {
+    return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile,
                                 RequiresNullTerminator);
   }
 
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 94db56a9fd5d78c..19a66fbb0731194 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -126,6 +126,9 @@ PPKEYWORD(error)
 // C99 6.10.6 - Pragma Directive.
 PPKEYWORD(pragma)
 
+// C23 & C++26 #embed
+PPKEYWORD(embed)
+
 // GNU Extensions.
 PPKEYWORD(import)
 PPKEYWORD(include_next)
@@ -151,6 +154,10 @@ TOK(eod)                 // End of preprocessing directive (end of line inside a
                          // directive).
 TOK(code_completion)     // Code completion marker
 
+// #embed speed support
+TOK(builtin_embed)
+
+
 // C99 6.4.9: Comments.
 TOK(comment)             // Comment (only in -E -C[C] mode)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3f2058a5d4650ca..a77a1a5e9aad981 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -114,6 +114,11 @@ def IncludePath_Group : OptionGroup<"<I/i group>">, Group<Preprocessor_Group>,
                         DocBrief<[{
 Flags controlling how ``#include``\s are resolved to files.}]>;
 
+def EmbedPath_Group : OptionGroup<"<Embed group>">, Group<Preprocessor_Group>,
+                        DocName<"Embed path management">,
+                        DocBrief<[{
+Flags controlling how ``#embed``\s and similar are resolved to files.}]>;
+
 def I_Group : OptionGroup<"<I group>">, Group<IncludePath_Group>, DocFlatten;
 def i_Group : OptionGroup<"<i group>">, Group<IncludePath_Group>, DocFlatten;
 def clang_i_Group : OptionGroup<"<clang i group>">, Group<i_Group>, DocFlatten;
@@ -816,6 +821,14 @@ will be ignored}]>;
 def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     Visibility<[ClangOption, FlangOption]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
+def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
+    Flags<[RenderJoined]>, Group<EmbedPath_Group>,
+    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
+def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
+    Flags<[RenderJoined]>, Group<EmbedPath_Group>,
+    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
 def MD : Flag<["-"], "MD">, Group<M_Group>,
     HelpText<"Write a depfile containing user and system headers">;
 def MMD : Flag<["-"], "MMD">, Group<M_Group>,
@@ -1353,6 +1366,9 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>
 def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Print include directives in -E mode in addition to normal output">,
   MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>;
+def dE : Flag<["-"], "dE">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Print embed directives in -E mode in addition to normal output">,
+  MarshallingInfoFlag<PreprocessorOutputOpts<"ShowEmbedDirectives">>;
 def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Print macro definitions in -E mode instead of normal output">;
 def dead__strip : Flag<["-"], "dead_strip">;
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index db2ec9f2ae20698..3e36db3f8ce46ea 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -22,6 +22,7 @@ class PreprocessorOutputOptions {
   unsigned ShowMacroComments : 1;  ///< Show comments, even in macros.
   unsigned ShowMacros : 1;         ///< Print macro definitions.
   unsigned ShowIncludeDirectives : 1;  ///< Print includes, imports etc. within preprocessed output.
+  unsigned ShowEmbedDirectives : 1;  ///< Print embeds, etc. within preprocessed output.
   unsigned RewriteIncludes : 1;    ///< Preprocess include directives only.
   unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
   unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
@@ -37,6 +38,7 @@ class PreprocessorOutputOptions {
     ShowMacroComments = 0;
     ShowMacros = 0;
     ShowIncludeDirectives = 0;
+    ShowEmbedDirectives = 0;
     RewriteIncludes = 0;
     RewriteImports = 0;
     MinimizeWhitespace = 0;
diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index 94f96cf9c512541..921bf159ead570d 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -83,6 +83,47 @@ class PPCallbacks {
                            const Token &FilenameTok,
                            SrcMgr::CharacteristicKind FileType) {}
 
+  /// Callback invoked whenever the preprocessor cannot find a file for an
+  /// embed directive.
+  ///
+  /// \param FileName The name of the file being included, as written in the
+  /// source code.
+  ///
+  /// \returns true to indicate that the preprocessor should skip this file
+  /// and not issue any diagnostic.
+  virtual bool EmbedFileNotFound(StringRef FileName) { return false; }
+
+  /// Callback invoked whenever an embed directive has been processed,
+  /// regardless of whether the embed will actually find a file.
+  ///
+  /// \param HashLoc The location of the '#' that starts the embed directive.
+  ///
+  /// \param FileName The name of the file being included, as written in the
+  /// source code.
+  ///
+  /// \param IsAngled Whether the file name was enclosed in angle brackets;
+  /// otherwise, it was enclosed in quotes.
+  ///
+  /// \param FilenameRange The character range of the quotes or angle brackets
+  /// for the written file name.
+  ///
+  /// \param ParametersRange The character range of the embed parameters. An
+  /// empty range if there were no parameters.
+  ///
+  /// \param File The actual file that may be included by this embed directive.
+  ///
+  /// \param SearchPath Contains the search path which was used to find the file
+  /// in the file system. If the file was found via an absolute path,
+  /// SearchPath will be empty.
+  ///
+  /// \param RelativePath The path relative to SearchPath, at which the resource
+  /// file was found. This is equal to FileName.
+  virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName,
+                              bool IsAngled, CharSourceRange FilenameRange,
+                              CharSourceRange ParametersRange,
+                              OptionalFileEntryRef File, StringRef SearchPath,
+                              StringRef RelativePath) {}
+
   /// Callback invoked whenever the preprocessor cannot find a file for an
   /// inclusion directive.
   ///
@@ -330,11 +371,15 @@ class PPCallbacks {
                        SourceRange Range) {
   }
 
+  /// Hook called when a '__has_embed' directive is read.
+  virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+                        OptionalFileEntryRef File) {}
+
   /// Hook called when a '__has_include' or '__has_include_next' directive is
   /// read.
   virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
                           OptionalFileEntryRef File,
-                          SrcMgr::CharacteristicKind FileType);
+                          SrcMgr::CharacteristicKind FileType) {}
 
   /// Hook called when a source range is skipped.
   /// \param Range The SourceRange that was skipped. The range begins at the
@@ -461,6 +506,25 @@ class PPChainedCallbacks : public PPCallbacks {
     Second->FileSkipped(SkippedFile, FilenameTok, FileType);
   }
 
+  bool EmbedFileNotFound(StringRef FileName) override {
+    bool Skip = First->FileNotFound(FileName);
+    // Make sure to invoke the second callback, no matter if the first already
+    // returned true to skip the file.
+    Skip |= Second->FileNotFound(FileName);
+    return Skip;
+  }
+
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override {
+    First->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
+                          ParametersRange, File, SearchPath, RelativePath);
+    Second->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange,
+                           ParametersRange, File, SearchPath, RelativePath);
+  }
+
   bool FileNotFound(StringRef FileName) override {
     bool Skip = First->FileNotFound(FileName);
     // Make sure to invoke the second callback, no matter if the first already
@@ -561,9 +625,18 @@ class PPChainedCallbacks : public PPCallbacks {
     Second->PragmaDiagnostic(Loc, Namespace, mapping, Str);
   }
 
+  void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+                OptionalFileEntryRef File) override {
+    First->HasEmbed(Loc, FileName, IsAngled, File);
+    Second->HasEmbed(Loc, FileName, IsAngled, File);
+  }
+
   void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
                   OptionalFileEntryRef File,
-                  SrcMgr::CharacteristicKind FileType) override;
+                  SrcMgr::CharacteristicKind FileType) override {
+    First->HasInclude(Loc, FileName, IsAngled, File, FileType);
+    Second->HasInclude(Loc, FileName, IsAngled, File, FileType);
+  }
 
   void PragmaOpenCLExtension(SourceLocation NameLoc, const IdentifierInfo *Name,
                              SourceLocation StateLoc, unsigned State) override {
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 18d88407ae12c90..7470bf5882730cb 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -31,6 +31,7 @@
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -53,6 +54,7 @@
 #include <optional>
 #include <string>
 #include <utility>
+#include <variant>
 #include <vector>
 
 namespace llvm {
@@ -165,6 +167,7 @@ class Preprocessor {
   IdentifierInfo *Ident__has_builtin;              // __has_builtin
   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
   IdentifierInfo *Ident__has_attribute;            // __has_attribute
+  IdentifierInfo *Ident__has_embed;                // __has_embed
   IdentifierInfo *Ident__has_include;              // __has_include
   IdentifierInfo *Ident__has_include_next;         // __has_include_next
   IdentifierInfo *Ident__has_warning;              // __has_warning
@@ -206,7 +209,10 @@ class Preprocessor {
 
   enum {
     /// Maximum depth of \#includes.
-    MaxAllowedIncludeStackDepth = 200
+    MaxAllowedIncludeStackDepth = 200,
+    VALUE__STDC_EMBED_NOT_FOUND__ = 0,
+    VALUE__STDC_EMBED_FOUND__ = 1,
+    VALUE__STDC_EMBED_EMPTY__ = 2,
   };
 
   // State that is set before the preprocessor begins.
@@ -1728,6 +1734,22 @@ class Preprocessor {
   /// Lex a token, forming a header-name token if possible.
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
+  struct LexEmbedParametersResult {
+    bool Successful;
+    std::optional<size_t> MaybeLimitParam;
+    std::optional<size_t> MaybeOffsetParam;
+    std::optional<SmallVector<Token, 2>> MaybeIfEmptyParam;
+    std::optional<SmallVector<Token, 2>> MaybePrefixParam;
+    std::optional<SmallVector<Token, 2>> MaybeSuffixParam;
+    int UnrecognizedParams;
+    SourceLocation StartLoc;
+    SourceLocation EndLoc;
+  };
+
+  LexEmbedParametersResult LexEmbedParameters(Token &Current,
+                                              bool InHasEmbed = false,
+                                              bool DiagnoseUnknown = true);
+
   bool LexAfterModuleImport(Token &Result);
   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
 
@@ -2413,6 +2435,17 @@ class Preprocessor {
              bool *IsFrameworkFound, bool SkipCache = false,
              bool OpenFile = true, bool CacheFailures = true);
 
+  /// Given a "foo" or \<foo> reference, look up the indicated embed resource.
+  ///
+  /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
+  /// reference is for system \#include's or not (i.e. using <> instead of "").
+  OptionalFileEntryRef
+  LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+                  bool OpenFile,
+                  const FileEntry *LookupFromFile = nullptr,
+                  SmallVectorImpl<char> *SearchPath = nullptr,
+                  SmallVectorImpl<char> *RelativePath = nullptr);
+
   /// Return true if we're in the top-level file, not in a \#include.
   bool isInPrimaryFile() const;
 
@@ -2517,6 +2550,9 @@ class Preprocessor {
   /// Information about the result for evaluating an expression for a
   /// preprocessor directive.
   struct DirectiveEvalResult {
+    /// The integral value of the expression.
+    std::optional<llvm::APSInt> Value;
+
     /// Whether the expression was evaluated as true or not.
     bool Conditional;
 
@@ -2531,7 +2567,24 @@ class Preprocessor {
   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
   ///
   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
-  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                                  bool CheckForEoD = true,
+                                                  bool Parenthesized = false);
+
+  /// Evaluate an integer constant expression that may occur after a
+  /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
+  ///
+  /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                                  Token &Tok,
+                                                  bool CheckForEoD = true,
+                                                  bool Parenthesized = false);
+
+  /// Process a '__has_embed("path" [, ...])' expression.
+  ///
+  /// Returns predefined `__STDC_EMBED_*` macro values if
+  /// successful.
+  int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
 
   /// Process a '__has_include("path")' expression.
   ///
@@ -2679,6 +2732,15 @@ class Preprocessor {
       const FileEntry *LookupFromFile, StringRef &LookupFilename,
       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
+  // Binary data inclusion
+  void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+                            const FileEntry *LookupFromFile = nullptr);
+  void HandleEmbedDirectiveNaive(
+      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
+      StringRef BinaryContents, const size_t TargetCharWidth);
+  void HandleEmbedDirectiveBuiltin(
+      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
+      StringRef BinaryContents, const size_t TargetCharWidth);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 058194bcde72e51..23f3458d79e0312 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -167,6 +167,13 @@ class PreprocessorOptions {
   /// of the specified memory buffer (the second part of each pair).
   std::vector<std::pair<std::string, llvm::MemoryBuffer *>> RemappedFileBuffers;
 
+  /// User specified embed entries.
+  std::vector<std::string> EmbedEntries;
+
+  /// Whether or not naive expansion should be used all the time for
+  /// builtin embed
+  bool NoBuiltinPPEmbed = false;
+
   /// Whether the compiler instance should retain (i.e., not free)
   /// the buffers associated with remapped files.
   ///
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index d16626b10652136..e0e80b5e0fbedbe 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -537,13 +537,19 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
 
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
 FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
-                              bool RequiresNullTerminator) {
+                              bool RequiresNullTerminator,
+                              std::optional<int64_t> MaybeLimit) {
   const FileEntry *Entry = &FE.getFileEntry();
   // If the content is living on the file entry, return a reference to it.
   if (Entry->Content)
     return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef());
 
   uint64_t FileSize = Entry->getSize();
+
+  if (MaybeLimit)
+    FileSize = *MaybeLimit;
+
+
   // If there's a high enough chance that the file have changed since we
   // got its size, force a stat before opening it.
   if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index e5599d545541085..d2b5426d27bb3b2 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -423,7 +423,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // case values).  Note that this depends on 'if' being null terminated.
 
 #define HASH(LEN, FIRST, THIRD) \
-  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
+  (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63)
 #define CASE(LEN, FIRST, THIRD, NAME) \
   case HASH(LEN, FIRST, THIRD): \
     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -438,6 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 4, 'e', 's', else);
   CASE( 4, 'l', 'n', line);
   CASE( 4, 's', 'c', sccs);
+  CASE( 5, 'e', 'b', embed);
   CASE( 5, 'e', 'd', endif);
   CASE( 5, 'e', 'r', error);
   CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b91126ebed0186c..fc2f749a34fc471 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1324,7 +1324,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map});
+                   options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
@@ -8182,6 +8182,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
+  // Pass along any -embed-dir or similar options so we get proper embed paths.
+  Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group);
+
   // Determine the original source input.
   auto FindSource = [](const Action *S) -> const Action * {
     while (S->getKind() != Action::InputClass) {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 606e9e790ad833b..232626e783e1b7d 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -1008,6 +1008,7 @@ struct AdditionalKeywords {
     kw_synchronized = &IdentTable.get("synchronized");
     kw_throws = &IdentTable.get("throws");
     kw___except = &IdentTable.get("__except");
+    kw___has_embed = &IdentTable.get("__has_embed");
     kw___has_include = &IdentTable.get("__has_include");
     kw___has_include_next = &IdentTable.get("__has_include_next");
 
@@ -1305,6 +1306,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_NS_ERROR_ENUM;
   IdentifierInfo *kw_NS_OPTIONS;
   IdentifierInfo *kw___except;
+  IdentifierInfo *kw___has_embed;
   IdentifierInfo *kw___has_include;
   IdentifierInfo *kw___has_include_next;
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 543c119620bf28f..e405a9085951dc0 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1400,6 +1400,9 @@ class AnnotatingParser {
                        Keywords.kw___has_include_next)) {
         parseHasInclude();
       }
+      else if (Tok->is(Keywords.kw___has_embed)) {
+        parseHasEmbed();
+      }
       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
           Tok->Next->isNot(tok::l_paren)) {
         Tok->setType(TT_CSharpGenericTypeConstraint);
@@ -1464,6 +1467,21 @@ class AnnotatingParser {
     }
   }
 
+  void parseEmbedDirective() {
+    if (CurrentToken && CurrentToken->is(tok::less)) {
+      next();
+      while (CurrentToken) {
+        // Mark tokens up to the trailing line comments as implicit string
+        // literals.
+        if (CurrentToken->isNot(tok::comment) &&
+            !CurrentToken->TokenText.startswith("//")) {
+          CurrentToken->setType(TT_ImplicitStringLiteral);
+        }
+        next();
+      }
+    }
+  }
+
   void parseWarningOrError() {
     next();
     // We still want to format the whitespace left of the first token of the
@@ -1500,6 +1518,14 @@ class AnnotatingParser {
     next(); // ')'
   }
 
+  void parseHasEmbed() {
+    if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
+      return;
+    next(); // '('
+    parseEmbedDirective();
+    next(); // ')'
+  }
+
   LineType parsePreprocessorDirective() {
     bool IsFirstToken = CurrentToken->IsFirst;
     LineType Type = LT_PreprocessorDirective;
@@ -1563,6 +1589,8 @@ class AnnotatingParser {
       } else if (Tok->isOneOf(Keywords.kw___has_include,
                               Keywords.kw___has_include_next)) {
         parseHasInclude();
+      } else if (Tok->is(Keywords.kw___has_embed)) {
+        parseHasEmbed();
       }
     }
     return Type;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bb442495f58359c..05406b5d42d7380 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4302,6 +4302,12 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
   if (Opts.SourceDateEpoch)
     GenerateArg(Consumer, OPT_source_date_epoch, Twine(*Opts.SourceDateEpoch));
 
+  for (const auto &EmbedEntry : Opts.EmbedEntries)
+    GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
+
+  if (Opts.NoBuiltinPPEmbed)
+    GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
 }
@@ -4394,6 +4400,19 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     }
   }
 
+  for (const auto *A : Args.filtered(OPT_embed_dir, OPT_embed_dir_EQ)) {
+    StringRef Val = A->getValue();
+    Opts.EmbedEntries.push_back(std::string(Val));
+  }
+
+  // Can disable the internal embed builtin / token
+  for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
+    StringRef Val = A->getValue();
+    if (Val == "pp_embed") {
+      Opts.NoBuiltinPPEmbed = true;
+    }
+  }
+
   // Always avoid lexing editor placeholders when we're just running the
   // preprocessor as we never want to emit the
   // "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index c2f6f41ae291efb..10558b1d34bf623 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -65,6 +65,21 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
+  void EmbedDirective(SourceLocation HashLoc,
+                          StringRef FileName, bool IsAngled,
+                          CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+                          OptionalFileEntryRef File, StringRef SearchPath,
+                          StringRef RelativePath) override {
+    if (!File)
+      DepCollector.maybeAddDependency(FileName,
+                                      /*FromModule*/ false,
+                                      /*IsSystem*/ false,
+                                      /*IsModuleFile*/ false,
+                                      &PP.getFileManager(),
+                                      /*IsMissing*/ true);
+    // Files that actually exist are handled by FileChanged.
+  }
+
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -81,6 +96,20 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
     // Files that actually exist are handled by FileChanged.
   }
 
+  void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
+                  OptionalFileEntryRef File) override {
+    if (!File)
+      return;
+    StringRef Filename =
+        llvm::sys::path::remove_leading_dotslash(File->getName());
+    DepCollector.maybeAddDependency(Filename,
+                                    /*FromModule=*/false,
+                                    false,
+                                    /*IsModuleFile=*/false,
+                                    &PP.getFileManager(),
+                                    /*IsMissing=*/false);
+  }
+
   void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
                   OptionalFileEntryRef File,
                   SrcMgr::CharacteristicKind FileType) override {
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 6aad04370f6e7ad..683f751a94244ec 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -26,6 +26,14 @@ namespace DOT = llvm::DOT;
 
 namespace {
 class DependencyGraphCallback : public PPCallbacks {
+public:
+  enum DirectiveBehavior {
+    Normal = 0,
+    IgnoreEmbed = 0b01,
+    IgnoreInclude = 0b10,
+  };
+
+private:
   const Preprocessor *PP;
   std::string OutputFile;
   std::string SysRoot;
@@ -34,6 +42,7 @@ class DependencyGraphCallback : public PPCallbacks {
       llvm::DenseMap<FileEntryRef, SmallVector<FileEntryRef, 2>>;
 
   DependencyMap Dependencies;
+  DirectiveBehavior Behavior;
 
 private:
   raw_ostream &writeNodeReference(raw_ostream &OS,
@@ -42,7 +51,8 @@ class DependencyGraphCallback : public PPCallbacks {
 
 public:
   DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
-                          StringRef SysRoot)
+                          StringRef SysRoot,
+                          DirectiveBehavior Action = IgnoreEmbed)
     : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
@@ -52,6 +62,12 @@ class DependencyGraphCallback : public PPCallbacks {
                           StringRef RelativePath, const Module *Imported,
                           SrcMgr::CharacteristicKind FileType) override;
 
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override;
+
   void EndOfMainFile() override {
     OutputGraphFile();
   }
@@ -70,6 +86,31 @@ void DependencyGraphCallback::InclusionDirective(
     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
     SrcMgr::CharacteristicKind FileType) {
+  if ((Behavior & IgnoreInclude) == IgnoreInclude) {
+    return;
+  }
+  if (!File)
+    return;
+
+  SourceManager &SM = PP->getSourceManager();
+  OptionalFileEntryRef FromFile =
+      SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc)));
+  if (!FromFile)
+    return;
+
+  Dependencies[*FromFile].push_back(*File);
+
+  AllFiles.insert(*File);
+  AllFiles.insert(*FromFile);
+}
+
+void DependencyGraphCallback::EmbedDirective(
+    SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+    CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+    OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+  if ((Behavior & IgnoreEmbed) == IgnoreEmbed) {
+    return;
+  }
   if (!File)
     return;
 
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 846e5fce6de7b2c..b7d084773b0a195 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -498,6 +498,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__STDC_UTF_16__", "1");
   Builder.defineMacro("__STDC_UTF_32__", "1");
 
+  // __has_embed definitions
+  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0");
+  Builder.defineMacro("__STDC_EMBED_FOUND__", "1");
+  Builder.defineMacro("__STDC_EMBED_EMPTY__", "2");
+
   if (LangOpts.ObjC)
     Builder.defineMacro("__OBJC__");
 
@@ -729,6 +734,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
   if (LangOpts.Char8)
     Builder.defineMacro("__cpp_char8_t", "202207L");
   Builder.defineMacro("__cpp_impl_destroying_delete", "201806L");
+
+  Builder.defineMacro("__cpp_pp_embed", "202403L");
 }
 
 /// InitializeOpenCLFeatureTestMacros - Define OpenCL macros based on target
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 7f5f6690682300e..fb9baa92e6836d3 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   bool DisableLineMarkers;
   bool DumpDefines;
   bool DumpIncludeDirectives;
+  bool DumpEmbedDirectives;
   bool UseLineDirectives;
   bool IsFirstFileEntered;
   bool MinimizeWhitespace;
@@ -106,12 +107,13 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 
 public:
   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
-                           bool defines, bool DumpIncludeDirectives,
+                           bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives,
                            bool UseLineDirectives, bool MinimizeWhitespace,
                            bool DirectivesOnly, bool KeepSystemIncludes)
       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
         DisableLineMarkers(lineMarkers), DumpDefines(defines),
         DumpIncludeDirectives(DumpIncludeDirectives),
+        DumpEmbedDirectives(DumpEmbedDirectives),
         UseLineDirectives(UseLineDirectives),
         MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
         KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
@@ -149,6 +151,11 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
                    FileID PrevFID) override;
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -398,6 +405,20 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
   }
 }
 
+void PrintPPOutputPPCallbacks::EmbedDirective(
+    SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+    CharSourceRange FilenameRange, CharSourceRange ParametersRange,
+    OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) {
+  // In -dI mode, dump #include directives prior to dumping their content or
+  // interpretation.
+  if (DumpEmbedDirectives) {
+    MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+    *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+       << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
+    setEmittedDirectiveOnThisLine();
+  }
+}
+
 void PrintPPOutputPPCallbacks::InclusionDirective(
     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
@@ -981,7 +1002,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
-      Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
+      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives,
       Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
 
   // Expand macros in pragmas with -fms-extensions.  The assumption is that
diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 28f7b0b9edfc5c2..4a73946951fd9c2 100644
--- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -71,6 +71,11 @@ class InclusionRewriter : public PPCallbacks {
                    FileID PrevFID) override;
   void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
                    SrcMgr::CharacteristicKind FileType) override;
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -177,6 +182,14 @@ void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
   LastInclusionLocation = SourceLocation();
 }
 
+/// This should be called whenever the preprocessor encounters embed
+/// directives.
+void InclusionRewriter::EmbedDirective(
+    SourceLocation /*HashLoc*/, StringRef /*FileName*/, bool /*IsAngled*/,
+    CharSourceRange /*FilenameRange*/, CharSourceRange /*ParametersRange*/,
+    OptionalFileEntryRef /*File*/, StringRef /*SearchPath*/,
+    StringRef /*RelativePath*/) {}
+
 /// This should be called whenever the preprocessor encounters include
 /// directives. It does not say whether the file has been included, but it
 /// provides more information about the directive (hash location instead
diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp
index f2b60a728e90178..ea5dce2c27a587c 100644
--- a/clang/lib/Lex/PPCallbacks.cpp
+++ b/clang/lib/Lex/PPCallbacks.cpp
@@ -14,16 +14,5 @@ using namespace clang;
 // Out of line key method.
 PPCallbacks::~PPCallbacks() = default;
 
-void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,
-                             bool IsAngled, OptionalFileEntryRef File,
-                             SrcMgr::CharacteristicKind FileType) {}
-
 // Out of line key method.
 PPChainedCallbacks::~PPChainedCallbacks() = default;
-
-void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,
-                                    bool IsAngled, OptionalFileEntryRef File,
-                                    SrcMgr::CharacteristicKind FileType) {
-  First->HasInclude(Loc, FileName, IsAngled, File, FileType);
-  Second->HasInclude(Loc, FileName, IsAngled, File, FileType);
-}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e3065c17dc70b43..e0d98d7ca03fa11 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -18,7 +18,9 @@
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/FrontendOptions.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LexDiagnostic.h"
@@ -1079,6 +1081,101 @@ OptionalFileEntryRef Preprocessor::LookupFile(
   return std::nullopt;
 }
 
+OptionalFileEntryRef Preprocessor::LookupEmbedFile(
+    SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+    bool OpenFile, const FileEntry *LookupFromFile,
+    SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath) {
+  FileManager &FM = this->getFileManager();
+  if (llvm::sys::path::is_absolute(Filename)) {
+    // lookup path or immediately fail
+    llvm::Expected<FileEntryRef> ShouldBeEntry =
+        FM.getFileRef(Filename, true, OpenFile);
+    return llvm::expectedToOptional(std::move(ShouldBeEntry));
+  }
+
+  // Otherwise, it's search time!
+  SmallString<512> LookupPath;
+  // Non-angled lookup
+  if (!isAngled) {
+    bool TryLocalLookup = false;
+    if (SearchPath) {
+      // use the provided search path as the local lookup path
+      llvm::sys::path::native(*SearchPath, LookupPath);
+      TryLocalLookup = true;
+    } else if (LookupFromFile) {
+      // Use file-based lookup here
+      StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
+      if (!FullFileDir.empty()) {
+        llvm::sys::path::native(FullFileDir, LookupPath);
+        llvm::sys::path::remove_filename(LookupPath);
+        TryLocalLookup = true;
+      }
+    } else {
+      // Cannot do local lookup: give up.
+      TryLocalLookup = false;
+    }
+    if (TryLocalLookup) {
+      if (!LookupPath.empty() &&
+          !llvm::sys::path::is_separator(LookupPath.back())) {
+        LookupPath.append(llvm::sys::path::get_separator());
+      }
+      LookupPath.append(Filename);
+      llvm::Expected<FileEntryRef> ShouldBeEntry =
+          FM.getFileRef(LookupPath, true, OpenFile);
+      if (ShouldBeEntry) {
+        return std::move(*ShouldBeEntry);
+      } else {
+        llvm::consumeError(ShouldBeEntry.takeError());
+      }
+    }
+  }
+
+  if (!isAngled) {
+    // do working directory lookup
+    LookupPath.clear();
+    auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
+    if (MaybeWorkingDirEntry) {
+      DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
+      StringRef WorkingDir = WorkingDirEntry.getName();
+      if (!WorkingDir.empty()) {
+        llvm::sys::path::native(WorkingDir, LookupPath);
+        if (!LookupPath.empty() &&
+            !llvm::sys::path::is_separator(LookupPath.back())) {
+          LookupPath.append(llvm::sys::path::get_separator());
+        }
+        LookupPath.append(llvm::sys::path::get_separator());
+        LookupPath.append(Filename);
+        llvm::Expected<FileEntryRef> ShouldBeEntry =
+            FM.getFileRef(LookupPath, true, OpenFile);
+        if (ShouldBeEntry) {
+          return std::move(*ShouldBeEntry);
+        } else {
+          llvm::consumeError(ShouldBeEntry.takeError());
+        }
+      }
+    }
+  }
+
+  for (const auto &Entry : PPOpts->EmbedEntries) {
+    LookupPath.clear();
+    llvm::sys::path::native(Entry, LookupPath);
+    if (!LookupPath.empty() &&
+        !llvm::sys::path::is_separator(LookupPath.back())) {
+      LookupPath.append(llvm::sys::path::get_separator());
+    }
+    LookupPath.append(Filename.begin(), Filename.end());
+    llvm::sys::path::native(LookupPath);
+    llvm::Expected<FileEntryRef> ShouldBeEntry =
+        FM.getFileRef(LookupPath, true, OpenFile);
+    if (ShouldBeEntry) {
+      return std::move(*ShouldBeEntry);
+    } else {
+      llvm::consumeError(ShouldBeEntry.takeError());
+    }
+  }
+  return std::nullopt;
+}
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Directive Handling.
 //===----------------------------------------------------------------------===//
@@ -1174,6 +1271,7 @@ void Preprocessor::HandleDirective(Token &Result) {
       case tok::pp_include_next:
       case tok::pp___include_macros:
       case tok::pp_pragma:
+      case tok::pp_embed:
         Diag(Result, diag::err_embedded_directive) << II->getName();
         Diag(*ArgMacro, diag::note_macro_expansion_here)
             << ArgMacro->getIdentifierInfo();
@@ -1288,6 +1386,11 @@ void Preprocessor::HandleDirective(Token &Result) {
       return HandleIdentSCCSDirective(Result);
     case tok::pp_sccs:
       return HandleIdentSCCSDirective(Result);
+    case tok::pp_embed:
+      return HandleEmbedDirective(SavedHash.getLocation(), Result,
+                                  getCurrentFileLexer()
+                                      ? getCurrentFileLexer()->getFileEntry()
+                                      : nullptr);
     case tok::pp_assert:
       //isExtension = true;  // FIXME: implement #assert
       break;
@@ -3517,3 +3620,400 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
 }
+
+enum class BracketType { Brace, Paren, Square };
+
+Preprocessor::LexEmbedParametersResult
+Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
+                                 bool DiagnoseUnknown) {
+  LexEmbedParametersResult Result{};
+  SmallString<32> Parameter;
+  SmallVector<Token, 2> ParameterTokens;
+  tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod;
+  Result.StartLoc = CurTok.getLocation();
+  for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) {
+    Parameter.clear();
+    // Lex identifier [:: identifier ...]
+    if (!CurTok.is(tok::identifier)) {
+      Diag(CurTok, diag::err_expected) << "identifier";
+      DiscardUntilEndOfDirective();
+      return Result;
+    }
+    Token ParameterStartTok = CurTok;
+    IdentifierInfo *InitialID = CurTok.getIdentifierInfo();
+    Parameter.append(InitialID->getName());
+    for (LexNonComment(CurTok); CurTok.is(tok::coloncolon);
+         LexNonComment(CurTok)) {
+      Parameter.append("::");
+      LexNonComment(CurTok);
+      if (!CurTok.is(tok::identifier)) {
+        Diag(CurTok, diag::err_expected) << "identifier";
+        DiscardUntilEndOfDirective();
+        return Result;
+      }
+      IdentifierInfo *NextID = CurTok.getIdentifierInfo();
+      Parameter.append(NextID->getName());
+    }
+    // Lex the parameters (dependent on the parameter type we want!)
+    if (Parameter == "limit") {
+      // we have a limit parameter and its internals are processed using
+      // evaluation rules from #if - handle here
+      if (CurTok.isNot(tok::l_paren)) {
+        Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
+        DiscardUntilEndOfDirective();
+        return Result;
+      }
+      IdentifierInfo *ParameterIfNDef = nullptr;
+      DirectiveEvalResult LimitEvalResult =
+          EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
+      if (!LimitEvalResult.Value) {
+        return Result;
+      }
+      const llvm::APSInt &LimitResult = *LimitEvalResult.Value;
+      const bool ValueDoesNotFit =
+          LimitResult.getBitWidth() > 64
+              ? true
+              : (LimitResult.isUnsigned() ||
+                 (LimitResult.isSigned() && LimitResult.isNegative()));
+      if (ValueDoesNotFit) {
+        Diag(CurTok, diag::warn_pp_expr_overflow);
+        // just truncate and roll with that, I guess?
+        Result.MaybeLimitParam =
+            static_cast<size_t>(LimitResult.getRawData()[0]);
+      } else {
+        Result.MaybeLimitParam =
+            static_cast<size_t>(LimitResult.getZExtValue());
+      }
+      LexNonComment(CurTok);
+    } else if (Parameter == "clang::offset") {
+      // we have a limit parameter and its internals are processed using
+      // evaluation rules from #if - handle here
+      if (CurTok.isNot(tok::l_paren)) {
+        Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
+        DiscardUntilEndOfDirective();
+        return Result;
+      }
+      IdentifierInfo *ParameterIfNDef = nullptr;
+      DirectiveEvalResult OffsetEvalResult =
+          EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
+      if (!OffsetEvalResult.Value) {
+        return Result;
+      }
+      const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value;
+      if (OffsetResult.getBitWidth() > 64) {
+        Diag(CurTok, diag::warn_pp_expr_overflow);
+        // just truncate and roll with that, I guess?
+        Result.MaybeOffsetParam =
+            static_cast<size_t>(OffsetResult.getRawData()[0]);
+      } else {
+        Result.MaybeOffsetParam =
+            static_cast<size_t>(OffsetResult.getZExtValue());
+      }
+      LexNonComment(CurTok);
+    } else {
+      if (CurTok.is(tok::l_paren)) {
+        SmallVector<BracketType, 4> Brackets;
+        Brackets.push_back(BracketType::Paren);
+        auto ParseArgToken = [&]() {
+          for (LexNonComment(CurTok); CurTok.isNot(tok::eod);
+               LexNonComment(CurTok)) {
+            switch (CurTok.getKind()) {
+            default:
+              break;
+            case tok::l_paren:
+              Brackets.push_back(BracketType::Paren);
+              break;
+            case tok::r_paren:
+              if (Brackets.back() != BracketType::Paren) {
+                Diag(CurTok, diag::err_pp_expected_rparen);
+                return false;
+              }
+              Brackets.pop_back();
+              if (Brackets.empty()) {
+                return true;
+              }
+              break;
+            case tok::l_brace:
+              Brackets.push_back(BracketType::Brace);
+              break;
+            case tok::r_brace:
+              if (Brackets.back() != BracketType::Brace) {
+                Diag(CurTok, diag::err_expected) << "}";
+                return false;
+              }
+              Brackets.pop_back();
+              break;
+            case tok::l_square:
+              Brackets.push_back(BracketType::Square);
+              break;
+            case tok::r_square:
+              if (Brackets.back() != BracketType::Square) {
+                Diag(CurTok, diag::err_expected) << "]";
+                return false;
+              }
+              Brackets.pop_back();
+              break;
+            }
+            ParameterTokens.push_back(CurTok);
+          }
+          if (!Brackets.empty()) {
+            Diag(CurTok, diag::err_pp_expected_rparen);
+            DiscardUntilEndOfDirective();
+            return false;
+          }
+          return true;
+        };
+        if (!ParseArgToken()) {
+          return Result;
+        }
+        if (!CurTok.is(tok::r_paren)) {
+          Diag(CurTok, diag::err_pp_expected_rparen);
+          DiscardUntilEndOfDirective();
+          return Result;
+        }
+        Lex(CurTok);
+      }
+      // "Token-soup" parameters
+      if (Parameter == "if_empty") {
+        // TODO: integer list optimization
+        Result.MaybeIfEmptyParam = std::move(ParameterTokens);
+      } else if (Parameter == "prefix") {
+        // TODO: integer list optimization
+        Result.MaybePrefixParam = std::move(ParameterTokens);
+      } else if (Parameter == "suffix") {
+        // TODO: integer list optimization
+        Result.MaybeSuffixParam = std::move(ParameterTokens);
+      } else {
+        ++Result.UnrecognizedParams;
+        if (DiagnoseUnknown) {
+          Diag(ParameterStartTok, diag::warn_pp_unknown_parameter_ignored)
+              << 1 << Parameter;
+        }
+      }
+    }
+  }
+  Result.Successful = true;
+  return Result;
+}
+
+// This array must survive for an extended period of time
+inline constexpr const char *IntegerLiterals[] = {
+    "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",   "10",
+    "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",  "20",  "21",
+    "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",  "30",  "31",  "32",
+    "33",  "34",  "35",  "36",  "37",  "38",  "39",  "40",  "41",  "42",  "43",
+    "44",  "45",  "46",  "47",  "48",  "49",  "50",  "51",  "52",  "53",  "54",
+    "55",  "56",  "57",  "58",  "59",  "60",  "61",  "62",  "63",  "64",  "65",
+    "66",  "67",  "68",  "69",  "70",  "71",  "72",  "73",  "74",  "75",  "76",
+    "77",  "78",  "79",  "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",
+    "88",  "89",  "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",
+    "99",  "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
+    "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120",
+    "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131",
+    "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142",
+    "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153",
+    "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164",
+    "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175",
+    "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186",
+    "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197",
+    "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208",
+    "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
+    "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230",
+    "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241",
+    "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
+    "253", "254", "255"};
+
+void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc,
+                                              LexEmbedParametersResult &Params,
+                                              StringRef BinaryContents,
+                                              const size_t TargetCharWidth) {
+  (void)TargetCharWidth; // for later, when we support various sizes
+  size_t TokenIndex = 0;
+  const size_t InitListTokensSize = [&]() {
+    if (BinaryContents.empty()) {
+      if (Params.MaybeIfEmptyParam) {
+        return Params.MaybeIfEmptyParam->size();
+      } else {
+        return static_cast<size_t>(0);
+      }
+    } else {
+      return static_cast<size_t>(
+          (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) +
+          (BinaryContents.size() * 2 - 1) +
+          (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0));
+    }
+  }();
+  std::unique_ptr<Token[]> InitListTokens(new Token[InitListTokensSize]());
+
+  if (BinaryContents.empty()) {
+    if (Params.MaybeIfEmptyParam) {
+      std::copy(Params.MaybeIfEmptyParam->begin(),
+                Params.MaybeIfEmptyParam->end(), InitListTokens.get());
+      TokenIndex += Params.MaybeIfEmptyParam->size();
+      assert(TokenIndex == InitListTokensSize);
+      EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true,
+                       true);
+    }
+    return;
+  }
+
+  // FIXME: this does not take the target's byte size into account;
+  // will fail on many DSPs and embedded machines!
+  if (Params.MaybePrefixParam) {
+    std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(),
+              InitListTokens.get() + TokenIndex);
+    TokenIndex += Params.MaybePrefixParam->size();
+  }
+  for (size_t I = 0; I < BinaryContents.size(); ++I) {
+    unsigned char ByteValue = BinaryContents[I];
+    StringRef ByteRepresentation = IntegerLiterals[ByteValue];
+    const size_t InitListIndex = TokenIndex;
+    Token &IntToken = InitListTokens[InitListIndex];
+    IntToken.setKind(tok::numeric_constant);
+    IntToken.setLiteralData(ByteRepresentation.data());
+    IntToken.setLength(ByteRepresentation.size());
+    IntToken.setLocation(FilenameLoc);
+    ++TokenIndex;
+    bool AtEndOfContents = I == (BinaryContents.size() - 1);
+    if (!AtEndOfContents) {
+      const size_t CommaInitListIndex = InitListIndex + 1;
+      Token &CommaToken = InitListTokens[CommaInitListIndex];
+      CommaToken.setKind(tok::comma);
+      CommaToken.setLocation(FilenameLoc);
+      ++TokenIndex;
+    }
+  }
+  if (Params.MaybeSuffixParam) {
+    std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(),
+              InitListTokens.get() + TokenIndex);
+    TokenIndex += Params.MaybeSuffixParam->size();
+  }
+  assert(TokenIndex == InitListTokensSize);
+  EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false);
+}
+
+void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc,
+                                               LexEmbedParametersResult &Params,
+                                               StringRef BinaryContents,
+                                               const size_t TargetCharWidth) {
+  // TODO: implement direct built-in support
+  HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+                             TargetCharWidth);
+}
+
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+                                        const FileEntry *LookupFromFile) {
+  if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed
+                                          : diag::warn_cxx26_pp_embed);
+    Diag(EmbedTok, EitherDiag);
+  }
+
+  // Parse the filename header
+  Token FilenameTok;
+  if (LexHeaderName(FilenameTok))
+    return;
+
+  if (FilenameTok.isNot(tok::header_name)) {
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    if (FilenameTok.isNot(tok::eod))
+      DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // Parse the optional sequence of
+  // directive-parameters:
+  //     identifier parameter-name-list[opt] directive-argument-list[opt]
+  // directive-argument-list:
+  //    '(' balanced-token-sequence ')'
+  // parameter-name-list:
+  //    '::' identifier parameter-name-list[opt]
+  Token CurTok;
+  LexEmbedParametersResult Params = LexEmbedParameters(
+      CurTok, /*InHasEmbed=*/false, /*DiagnoseUnknown=*/true);
+
+  // Now, splat the data out!
+  SmallString<128> FilenameBuffer;
+  SmallString<512> SearchPath;
+  SmallString<512> RelativePath;
+  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+  SourceLocation FilenameLoc = FilenameTok.getLocation();
+  StringRef OriginalFilename = Filename;
+  bool isAngled =
+      GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  assert(!Filename.empty());
+  OptionalFileEntryRef MaybeFileRef =
+      this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
+                            LookupFromFile, &SearchPath, &RelativePath);
+  if (!MaybeFileRef) {
+    // could not find file
+    if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
+      return;
+    }
+    Diag(FilenameTok, diag::err_pp_file_not_found)
+        << Filename;
+    return;
+  }
+  std::optional<int64_t> MaybeSignedLimit{};
+  if (Params.MaybeLimitParam) {
+    if (static_cast<uint64_t>(INT64_MAX) >= *Params.MaybeLimitParam) {
+      MaybeSignedLimit = static_cast<int64_t>(*Params.MaybeLimitParam);
+    }
+  }
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile = getFileManager().getBufferForFile(
+      *MaybeFileRef, false, false, MaybeSignedLimit);
+  if (!MaybeFile) {
+    // could not find file
+    Diag(FilenameTok, diag::err_cannot_open_file)
+        << Filename << "a buffer to the contents could not be created";
+    return;
+  }
+  StringRef BinaryContents = MaybeFile.get()->getBuffer();
+  if (Params.MaybeOffsetParam) {
+    // offsets all the way to the end of the file make for an empty file.
+    const size_t OffsetParam = *Params.MaybeOffsetParam;
+    BinaryContents = BinaryContents.substr(OffsetParam);
+  }
+  const size_t TargetCharWidth = getTargetInfo().getCharWidth();
+  if (TargetCharWidth > 64) {
+    // Too wide for us to handle
+    Diag(EmbedTok, diag::err_pp_unsupported_directive)
+        << 1
+        << "CHAR_BIT is too wide for the target architecture to handle "
+           "properly";
+    return;
+  }
+  if (TargetCharWidth != 8) {
+    Diag(EmbedTok, diag::err_pp_unsupported_directive)
+        << 1
+        << "At the moment, we do not have the machinery to support non 8-bit "
+           "CHAR_BIT targets!";
+    return;
+  }
+  if (CHAR_BIT % TargetCharWidth != 0) {
+    Diag(EmbedTok, diag::err_pp_unsupported_directive)
+        << 1
+        << "CHAR_BIT is not evenly divisible by host architecture's byte "
+           "definition";
+    return;
+  }
+  if (Callbacks) {
+    CharSourceRange FilenameSourceRange(
+        SourceRange(FilenameTok.getLocation(), FilenameTok.getEndLoc()), true);
+    CharSourceRange ParametersRange(SourceRange(Params.StartLoc, Params.EndLoc),
+                                    true);
+    Callbacks->EmbedDirective(HashLoc, Filename, isAngled, FilenameSourceRange,
+                              ParametersRange, MaybeFileRef, SearchPath,
+                              RelativePath);
+  }
+  if (PPOpts->NoBuiltinPPEmbed) {
+    HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+                              TargetCharWidth);
+  } else {
+    // emit a token directly, handle it internally.
+    HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents,
+                                TargetCharWidth);
+  }
+}
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 269984aae07bf28..dda5717afc699da 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -868,7 +868,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
 /// may occur after a #if or #elif directive.  If the expression is equivalent
 /// to "!defined(X)" return X in IfNDefMacro.
 Preprocessor::DirectiveEvalResult
-Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                          Token &Tok, bool CheckForEoD,
+                                          bool Parenthesized) {
   SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
@@ -880,7 +882,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
   DisableMacroExpansion = false;
 
   // Peek ahead one token.
-  Token Tok;
   LexNonComment(Tok);
 
   // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
@@ -901,7 +902,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
     // We cannot trust the source range from the value because there was a
     // parse error. Track the range manually -- the end of the directive is the
     // end of the condition range.
-    return {false,
+    return {std::nullopt,
+            false,
             DT.IncludedUndefinedIds,
             {ExprStartLoc, ConditionRange.getEnd()}};
   }
@@ -917,7 +919,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+    const bool IsNonZero = ResVal.Val != 0;
+    const SourceRange ValRange = ResVal.getRange();
+    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+            ValRange};
   }
 
   // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
@@ -930,17 +935,34 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
+    const bool IsNonZero = ResVal.Val != 0;
+    const SourceRange ValRange = ResVal.getRange();
+    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+            ValRange};
   }
 
-  // If we aren't at the tok::eod token, something bad happened, like an extra
-  // ')' token.
-  if (Tok.isNot(tok::eod)) {
-    Diag(Tok, diag::err_pp_expected_eol);
-    DiscardUntilEndOfDirective();
+  if (CheckForEoD) {
+    // If we aren't at the tok::eod token, something bad happened, like an extra
+    // ')' token.
+    if (Tok.isNot(tok::eod)) {
+      Diag(Tok, diag::err_pp_expected_eol);
+      DiscardUntilEndOfDirective();
+    }
   }
 
   // Restore 'DisableMacroExpansion'.
   DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-  return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+  const bool IsNonZero = ResVal.Val != 0;
+  const SourceRange ValRange = ResVal.getRange();
+  return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange};
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive.  If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression(
+    IdentifierInfo *&IfNDefMacro, bool CheckForEoD, bool Parenthesized) {
+  Token Tok;
+  return EvaluateDirectiveExpression(IfNDefMacro, Tok, CheckForEoD,
+                                     Parenthesized);
 }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index b371f8cf7a9c072..6e0163ccc89b7fb 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() {
     Ident__has_c_attribute = nullptr;
 
   Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
+  Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed");
   Ident__has_include      = RegisterBuiltinMacro(*this, "__has_include");
   Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
   Ident__has_warning      = RegisterBuiltinMacro(*this, "__has_warning");
@@ -1264,6 +1265,114 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
   return File.has_value();
 }
 
+/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
+/// Returns a filled optional with the value if successful; otherwise, empty.
+int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+  // pedwarn for not being on C23
+  if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed
+                                          : diag::warn_cxx26_pp_has_embed);
+    Diag(Tok, EitherDiag);
+  }
+
+  // Save the location of the current token.  If a '(' is later found, use
+  // that location.  If not, use the end of this location instead.
+  SourceLocation LParenLoc = Tok.getLocation();
+
+  // These expressions are only allowed within a preprocessor directive.
+  if (!this->isParsingIfOrElifDirective()) {
+    Diag(LParenLoc, diag::err_pp_directive_required) << II;
+    // Return a valid identifier token.
+    assert(Tok.is(tok::identifier));
+    Tok.setIdentifierInfo(II);
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+  // Get '('. If we don't have a '(', try to form a header-name token.
+  do {
+    if (this->LexHeaderName(Tok)) {
+      return VALUE__STDC_EMBED_NOT_FOUND__;
+    }
+  } while (Tok.getKind() == tok::comment);
+
+  // Ensure we have a '('.
+  if (Tok.isNot(tok::l_paren)) {
+    // No '(', use end of last token.
+    LParenLoc = this->getLocForEndOfToken(LParenLoc);
+    this->Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
+    // If the next token looks like a filename or the start of one,
+    // assume it is and process it as such.
+    if (Tok.isNot(tok::header_name)) {
+      return VALUE__STDC_EMBED_NOT_FOUND__;
+    }
+  } else {
+    // Save '(' location for possible missing ')' message.
+    LParenLoc = Tok.getLocation();
+    if (this->LexHeaderName(Tok)) {
+      return VALUE__STDC_EMBED_NOT_FOUND__;
+    }
+  }
+
+  if (Tok.isNot(tok::header_name)) {
+    Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+  SourceLocation FilenameLoc = Tok.getLocation();
+  Token FilenameTok = Tok;
+
+  Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
+  if (!Params.Successful) {
+    if (Tok.isNot(tok::eod))
+      this->DiscardUntilEndOfDirective();
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+  if (Params.UnrecognizedParams > 0) {
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+  if (!Tok.is(tok::r_paren)) {
+    Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
+        << II << tok::r_paren;
+    Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+    DiscardUntilEndOfDirective();
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+
+
+  SmallString<128> FilenameBuffer;
+  SmallString<256> RelativePath;
+  StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
+  StringRef OriginalFilename = Filename;
+  bool isAngled =
+      this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  assert(!Filename.empty());
+  const FileEntry *LookupFromFile =
+      this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
+                               : nullptr;
+  OptionalFileEntryRef MaybeFileEntry =
+      this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
+                            LookupFromFile, nullptr,
+                            &RelativePath);
+  if (Callbacks) {
+    Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
+  }
+  if (!MaybeFileEntry) {
+    return VALUE__STDC_EMBED_NOT_FOUND__;
+  }
+  size_t FileSize = MaybeFileEntry->getSize();
+  if (FileSize == 0 ||
+      (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) {
+    return VALUE__STDC_EMBED_EMPTY__;
+  }
+  if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) {
+    return VALUE__STDC_EMBED_EMPTY__;
+  }
+  return VALUE__STDC_EMBED_FOUND__;
+}
+
 bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
   return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr);
 }
@@ -1801,6 +1910,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
       return;
     OS << (int)Value;
     Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__has_embed) {
+    // The argument to these two builtins should be a parenthesized
+    // file name string literal using angle brackets (<>) or
+    // double-quotes (""), optionally followed by a series of
+    // arguments similar to form like attributes.
+    int Value = EvaluateHasEmbed(Tok, II);
+
+    if (Tok.isNot(tok::r_paren))
+      return;
+    OS << Value;
+    Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.
     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt
new file mode 100644
index 000000000000000..93d177a48c83ab8
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/jk.txt
@@ -0,0 +1 @@
+jk
\ No newline at end of file
diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt
new file mode 100644
index 000000000000000..1ce9ab967e4a154
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/art.txt
@@ -0,0 +1,9 @@
+           __  _
+       .-.'  `; `-._  __  _
+      (_,         .-:'  `; `-._
+    ,'o"(        (_,           )
+   (__,-'      ,'o"(            )>
+      (       (__,-'            )
+       `-'._.--._(             )
+          |||  |||`-'._.--._.-'
+                     |||  |||
diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty
new file mode 100644
index 000000000000000..e69de29bb2d1d64
diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt
new file mode 100644
index 000000000000000..63d8dbd40c23542
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/single_byte.txt
@@ -0,0 +1 @@
+b
\ No newline at end of file
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
new file mode 100644
index 000000000000000..80980e753614a5d
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 %s -E -embed-dir=%S/Inputs -CC -verify
+
+#if !__has_embed(__FILE__)
+#error 1
+#elif !__has_embed("media/art.txt")
+#error 2
+#elif __has_embed("asdkasdjkadsjkdsfjk")
+#error 3
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1))
+#error 4
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1))
+#error 5
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD"))
+#error 6
+#elif !__has_embed(__FILE__ limit(2) prefix(y))
+#error 7
+#elif !__has_embed(__FILE__ limit(2))
+#error 8
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x))
+#error 9
+#elif __has_embed(<media/empty>) != 2
+#error 10
+#elif __has_embed(<media/empty> limit(0)) != 2
+#error 11
+#elif __has_embed(<media/art.txt> limit(0)) != 2
+#error 12
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(1)) != 2
+#error 13
+#elif !__has_embed(<media/art.txt>)
+#error 14
+#elif !__has_embed(<media/art.txt> if_empty(meow))
+#error 14
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
new file mode 100644
index 000000000000000..fe0edb00e609837
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#if !__has_embed(__FILE__)
+#error 1
+#elif !__has_embed(__FILE__)
+#error 2
+#elif !__has_embed(__FILE__ suffix(x))
+#error 3
+#elif !__has_embed(__FILE__ suffix(x) limit(1))
+#error 4
+#elif !__has_embed(__FILE__ suffix(x) limit(1) prefix(1))
+#error 5
+#elif !__has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1))
+#error 6
+#elif !__has_embed(__FILE__ suffix(x) limit(0) prefix(1))
+#error 7
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != 2
+#error 8
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != 2
+#error 9
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != 2
+#error 10
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp
new file mode 100644
index 000000000000000..46787041ca23bec
--- /dev/null
+++ b/clang/test/Preprocessor/embed_feature_test.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -x c %s -E -CC -verify
+
+#if defined(__cplusplus)
+#if !defined(__cpp_pp_embed) || __cpp_pp_embed != 202403L
+#error 1
+#endif
+#endif
+
+#if !defined(__has_embed)
+#error 2
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_file_not_found.c b/clang/test/Preprocessor/embed_file_not_found.c
new file mode 100644
index 000000000000000..337fa4ac067ec71
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#embed <nfejfNejAKFe>
+// expected-error at -1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
new file mode 100644
index 000000000000000..cd517b7f216ac32
--- /dev/null
+++ b/clang/test/Preprocessor/embed_init.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+typedef struct kitty {
+	int purr;
+} kitty;
+
+typedef struct kitty_kitty {
+	int here;
+	kitty kit;
+} kitty_kitty;
+
+const int meow =
+#embed <single_byte.txt>
+;
+
+const kitty kit = {
+#embed <single_byte.txt>
+};
+
+const kitty_kitty kit_kit = {
+#embed <jk.txt>
+};
+
+_Static_assert(meow == 'b', "");
+_Static_assert(kit.purr == 'b', "");
+_Static_assert(kit_kit.here == 'j', "");
+_Static_assert(kit_kit.kit.purr == 'k', "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
new file mode 100644
index 000000000000000..ac1a768b27ffff9
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <media/empty> if_empty(123, 124, 125)
+};
+const char non_empty_data[] = {
+#embed <jk.txt> if_empty(123, 124, 125)
+};
+_Static_assert(sizeof(data) == 3, "");
+_Static_assert(123 == data[0], "");
+_Static_assert(124 == data[1], "");
+_Static_assert(125 == data[2], "");
+_Static_assert(sizeof(non_empty_data) == 2, "");
+_Static_assert('j' == non_empty_data[0], "");
+_Static_assert('k' == non_empty_data[1], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
new file mode 100644
index 000000000000000..28a94fe9430f033
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> limit(1)
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('j' == data[0], "");
+_Static_assert('k' == data[1], "");
+_Static_assert(sizeof(offset_data) == 1, "");
+_Static_assert('j' == offset_data[0], "");
+_Static_assert(offset_data[0] == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
new file mode 100644
index 000000000000000..71a029544dca556
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> clang::offset(1)
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('j' == data[0], "");
+_Static_assert('k' == data[1], "");
+_Static_assert(sizeof(offset_data) == 1, "");
+_Static_assert('k' == offset_data[0], "");
+_Static_assert(offset_data[0] == data[1], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
new file mode 100644
index 000000000000000..5182a2b874d3991
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> prefix('\xA', )
+};
+const char empty_data[] = {
+#embed <media/empty> prefix('\xA', )
+1
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('\xA' == data[0], "");
+_Static_assert('b' == data[1], "");
+_Static_assert(sizeof(empty_data) == 1, "");
+_Static_assert(1 == empty_data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
new file mode 100644
index 000000000000000..11c3f2bbbfb2bb6
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> suffix(, '\xA')
+};
+const char empty_data[] = {
+#embed <media/empty> suffix(, '\xA')
+1
+};
+_Static_assert(sizeof(data) == 2, "");
+_Static_assert('b' == data[0], "");
+_Static_assert('\xA' == data[1], "");
+_Static_assert(sizeof(empty_data) == 1, "");
+_Static_assert(1 == empty_data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
new file mode 100644
index 000000000000000..1f043ccd2ff54bf
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+
+#embed __FILE__ unrecognized
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized' ignored}}
+#embed __FILE__ unrecognized::param
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}}
+#embed __FILE__ unrecognized::param(with, args)
+// expected-warning at -1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}}
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
new file mode 100644
index 000000000000000..5c33871c0c8a4d8
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+const char data[] = {
+#embed <single_byte.txt>
+};
+_Static_assert(sizeof(data) == 1, "");
+_Static_assert('b' == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
new file mode 100644
index 000000000000000..791cd9176ebe0ab
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify
+
+const char data[] = {
+#embed "single_byte.txt"
+};
+_Static_assert(sizeof(data) == 1, "");
+_Static_assert('a' == data[0], "");
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt
new file mode 100644
index 000000000000000..2e65efe2a145dda
--- /dev/null
+++ b/clang/test/Preprocessor/single_byte.txt
@@ -0,0 +1 @@
+a
\ No newline at end of file
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 103c08ffbe83b38..8f9d7c77ccd150f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -777,6 +777,13 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
     "Semicolon-separated list of components to include in libLLVM, or \"all\".")
 endif()
 
+option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON)
+# Quiet down MSVC-style secure CRT warnings
+if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS)
+  add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1)
+endif()
+
+
 if(MSVC)
   option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON)
   # Set this variable to OFF here so it can't be set with a command-line
diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake
index 1be13bc01ab9b25..828227f2f25a2f0 100644
--- a/llvm/cmake/modules/GetHostTriple.cmake
+++ b/llvm/cmake/modules/GetHostTriple.cmake
@@ -2,7 +2,7 @@
 # Invokes config.guess
 
 function( get_host_triple var )
-  if( MSVC )
+  if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") )
     if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" )
       set( value "aarch64-pc-windows-msvc" )
     elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" )
@@ -41,7 +41,7 @@ function( get_host_triple var )
     else()
       set( value "powerpc-ibm-aix" )
     endif()
-  else( MSVC )
+  else()
     if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS)
       message(WARNING "unable to determine host target triple")
     else()
@@ -55,6 +55,6 @@ function( get_host_triple var )
       endif( NOT TT_RV EQUAL 0 )
       set( value ${TT_OUT} )
     endif()
-  endif( MSVC )
+  endif()
   set( ${var} ${value} PARENT_SCOPE )
 endfunction( get_host_triple var )

>From 6a7a4c959f1635f5c3549010d277b5834a3e3fe2 Mon Sep 17 00:00:00 2001
From: ThePhD <phdofthehouse at gmail.com>
Date: Sun, 8 Oct 2023 17:43:51 -0400
Subject: [PATCH 02/23] =?UTF-8?q?=E2=9C=A8=20Speedy=20#embed=20implementat?=
 =?UTF-8?q?ion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

⚡ [Lex] Better reservations for improved performance/memory usage.

🛠 [Lex, Frontend] Remove comma hardcoding since we are servicing a full file

apply suggestions from git-clang-format
---
 clang/include/clang/AST/Expr.h                |  51 ++
 clang/include/clang/AST/RecursiveASTVisitor.h |   1 +
 .../clang/Basic/DiagnosticCommonKinds.td      |   6 +
 clang/include/clang/Basic/FileManager.h       |   5 +-
 clang/include/clang/Basic/StmtNodes.td        |   1 +
 clang/include/clang/Basic/TokenKinds.def      |   6 +-
 .../Frontend/PreprocessorOutputOptions.h      |   3 +-
 .../include/clang/Lex/PPDirectiveParameter.h  |  32 ++
 clang/include/clang/Lex/PPEmbedParameters.h   |  78 ++++
 clang/include/clang/Lex/Preprocessor.h        |  42 +-
 clang/include/clang/Sema/Sema.h               |  37 ++
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/AST/Expr.cpp                        |  16 +
 clang/lib/AST/ExprClassification.cpp          |   5 +
 clang/lib/AST/ExprConstant.cpp                |   8 +
 clang/lib/AST/ItaniumMangle.cpp               |   1 +
 clang/lib/AST/StmtPrinter.cpp                 |   7 +
 clang/lib/AST/StmtProfile.cpp                 |   2 +
 clang/lib/Basic/FileManager.cpp               |   1 -
 clang/lib/Basic/IdentifierTable.cpp           |   6 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   3 +-
 clang/lib/Format/TokenAnnotator.cpp           |   3 +-
 clang/lib/Frontend/DependencyFile.cpp         |  15 +-
 clang/lib/Frontend/DependencyGraph.cpp        |   2 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  14 +-
 clang/lib/Interpreter/Interpreter.cpp         |   1 +
 clang/lib/Lex/Lexer.cpp                       |   8 +
 clang/lib/Lex/PPDirectives.cpp                | 434 ++++++++++++++----
 clang/lib/Lex/PPMacroExpansion.cpp            |  23 +-
 clang/lib/Lex/Preprocessor.cpp                |   6 +-
 clang/lib/Parse/ParseExpr.cpp                 | 104 +++++
 clang/lib/Parse/ParseTemplate.cpp             |   2 +
 clang/lib/Sema/SemaDecl.cpp                   |  48 ++
 clang/lib/Sema/SemaDeclCXX.cpp                |   3 +-
 clang/lib/Sema/SemaExceptionSpec.cpp          |   1 +
 clang/lib/Sema/SemaExpr.cpp                   | 239 +++++++++-
 clang/lib/Sema/SemaTemplate.cpp               |  56 +++
 clang/lib/Sema/TreeTransform.h                |   6 +
 clang/lib/Serialization/ASTReaderStmt.cpp     |  13 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |  10 +
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |   4 +
 clang/test/Preprocessor/embed_art.c           | 106 +++++
 clang/test/Preprocessor/embed_single_entity.c |   7 +
 clang/test/Preprocessor/embed_weird.cpp       |  68 +++
 llvm/include/llvm/Support/Base64.h            |  36 +-
 45 files changed, 1351 insertions(+), 172 deletions(-)
 create mode 100644 clang/include/clang/Lex/PPDirectiveParameter.h
 create mode 100644 clang/include/clang/Lex/PPEmbedParameters.h
 create mode 100644 clang/test/Preprocessor/embed_art.c
 create mode 100644 clang/test/Preprocessor/embed_single_entity.c
 create mode 100644 clang/test/Preprocessor/embed_weird.cpp

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index b69c616b0090365..d3fba205c91c934 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4805,6 +4805,57 @@ class SourceLocExpr final : public Expr {
   friend class ASTStmtReader;
 };
 
+/// Represents a function call to __builtin_pp_embed().
+class PPEmbedExpr final : public Expr {
+  SourceLocation BuiltinLoc, RParenLoc;
+  DeclContext *ParentContext;
+  StringLiteral *Filename;
+  StringLiteral *BinaryData;
+
+public:
+  enum Action {
+    NotFound,
+    FoundOne,
+    Expanded,
+  };
+
+  PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, StringLiteral *Filename,
+              StringLiteral *BinaryData, SourceLocation BLoc,
+              SourceLocation RParenLoc, DeclContext *Context);
+
+  /// Build an empty call expression.
+  explicit PPEmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {}
+
+  /// If the PPEmbedExpr has been resolved return the subexpression
+  /// representing the resolved value. Otherwise return null.
+  const DeclContext *getParentContext() const { return ParentContext; }
+  DeclContext *getParentContext() { return ParentContext; }
+
+  SourceLocation getLocation() const { return BuiltinLoc; }
+  SourceLocation getBeginLoc() const { return BuiltinLoc; }
+  SourceLocation getEndLoc() const { return RParenLoc; }
+
+  StringLiteral *getFilenameStringLiteral() const { return Filename; }
+  StringLiteral *getDataStringLiteral() const { return BinaryData; }
+
+  size_t getDataElementCount(ASTContext &Context) const;
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+
+  const_child_range children() const {
+    return const_child_range(child_iterator(), child_iterator());
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == PPEmbedExprClass;
+  }
+
+private:
+  friend class ASTStmtReader;
+};
+
 /// Describes an C or C++ initializer list.
 ///
 /// InitListExpr describes an initializer list, which can be used to
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 3dd23eb38eeabfc..6b7211bb0a0d3f1 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2809,6 +2809,7 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
 DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
 DEF_TRAVERSE_STMT(StmtExpr, {})
 DEF_TRAVERSE_STMT(SourceLocExpr, {})
+DEF_TRAVERSE_STMT(PPEmbedExpr, {})
 
 DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
   TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index f2df283c74829f6..4df86e35eebde38 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -59,6 +59,9 @@ def err_expected_string_literal : Error<"expected string literal "
           "'external_source_symbol' attribute|"
           "as argument of '%1' attribute}0">;
 
+def err_builtin_pp_embed_invalid_argument : Error<
+  "invalid argument to '__builtin_pp_embed': %0">;
+
 def err_invalid_string_udl : Error<
   "string literal with user-defined suffix cannot be used here">;
 def err_invalid_character_udl : Error<
@@ -80,6 +83,9 @@ def err_expected : Error<"expected %0">;
 def err_expected_either : Error<"expected %0 or %1">;
 def err_expected_after : Error<"expected %1 after %0">;
 
+def err_builtin_pp_embed_invalid_location : Error<
+  "'__builtin_pp_embed' in invalid location: %0%select{|%2}1">;
+
 def err_param_redefinition : Error<"redefinition of parameter %0">;
 def warn_method_param_redefinition : Warning<"redefinition of method parameter %0">;
 def warn_method_param_declaration : Warning<"redeclaration of method parameter %0">,
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index c757f8775b425e9..cbfcb292778e5f7 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -282,8 +282,9 @@ class FileManager : public RefCountedBase<FileManager> {
   getBufferForFile(StringRef Filename, bool isVolatile = false,
                    bool RequiresNullTerminator = true,
                    std::optional<int64_t> MaybeLimit = std::nullopt) {
-    return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile,
-                                RequiresNullTerminator);
+    return getBufferForFileImpl(Filename,
+                                /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1),
+                                isVolatile, RequiresNullTerminator);
   }
 
 private:
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index cec301dfca2817b..e3be997dd1c86e0 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -203,6 +203,7 @@ def OpaqueValueExpr : StmtNode<Expr>;
 def TypoExpr : StmtNode<Expr>;
 def RecoveryExpr : StmtNode<Expr>;
 def BuiltinBitCastExpr : StmtNode<ExplicitCastExpr>;
+def PPEmbedExpr : StmtNode<Expr>;
 
 // Microsoft Extensions.
 def MSPropertyRefExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 19a66fbb0731194..167bd614efe7bd9 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -154,10 +154,6 @@ TOK(eod)                 // End of preprocessing directive (end of line inside a
                          // directive).
 TOK(code_completion)     // Code completion marker
 
-// #embed speed support
-TOK(builtin_embed)
-
-
 // C99 6.4.9: Comments.
 TOK(comment)             // Comment (only in -E -C[C] mode)
 
@@ -758,6 +754,7 @@ ALIAS("__char32_t"   , char32_t          , KEYCXX)
 KEYWORD(__builtin_bit_cast               , KEYALL)
 KEYWORD(__builtin_available              , KEYALL)
 KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL)
+KEYWORD(__builtin_pp_embed               , KEYALL)
 
 // Keywords defined by Attr.td.
 #ifndef KEYWORD_ATTRIBUTE
@@ -993,6 +990,7 @@ ANNOTATION(repl_input_end)
 #undef CXX11_KEYWORD
 #undef KEYWORD
 #undef PUNCTUATOR
+#undef BUILTINOK
 #undef TOK
 #undef C99_KEYWORD
 #undef C23_KEYWORD
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index 3e36db3f8ce46ea..0bc32c65a58d2d8 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -22,7 +22,8 @@ class PreprocessorOutputOptions {
   unsigned ShowMacroComments : 1;  ///< Show comments, even in macros.
   unsigned ShowMacros : 1;         ///< Print macro definitions.
   unsigned ShowIncludeDirectives : 1;  ///< Print includes, imports etc. within preprocessed output.
-  unsigned ShowEmbedDirectives : 1;  ///< Print embeds, etc. within preprocessed output.
+  unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed
+                                    ///< output.
   unsigned RewriteIncludes : 1;    ///< Preprocess include directives only.
   unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
   unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h
new file mode 100644
index 000000000000000..fc413c345adc539
--- /dev/null
+++ b/clang/include/clang/Lex/PPDirectiveParameter.h
@@ -0,0 +1,32 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+
+#include "clang/Basic/SourceLocation.h"
+
+namespace clang {
+
+/// Captures basic information about a preprocessor directive parameter.
+class PPDirectiveParameter {
+public:
+  SourceLocation Start;
+  SourceLocation End;
+
+  PPDirectiveParameter(SourceLocation Start, SourceLocation End)
+      : Start(Start), End(End) {}
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
new file mode 100644
index 000000000000000..7b76d2d573c23bd
--- /dev/null
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -0,0 +1,78 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+
+#include "clang/Lex/PPDirectiveParameter.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+
+/// Preprocessor extension embed parameter "clang::offset"
+/// `clang::offset( constant-expression )`
+class PPEmbedParameterOffset : public PPDirectiveParameter {
+public:
+  size_t Offset;
+
+  PPEmbedParameterOffset(size_t Offset, SourceLocation Start,
+                         SourceLocation End)
+      : Offset(Offset), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "limit"
+/// `limit( constant-expression )`
+class PPEmbedParameterLimit : public PPDirectiveParameter {
+public:
+  size_t Limit;
+
+  PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End)
+      : Limit(Limit), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "prefix"
+/// `prefix( balanced-token-seq )`
+class PPEmbedParameterPrefix : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+                         SourceLocation End)
+      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "suffix"
+/// `suffix( balanced-token-seq )`
+class PPEmbedParameterSuffix : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+                         SourceLocation End)
+      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+/// Preprocessor standard embed parameter "if_empty"
+/// `if_empty( balanced-token-seq )`
+class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
+                          SourceLocation End)
+      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 7470bf5882730cb..58012fb79559e22 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,6 +29,7 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PPEmbedParameters.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/APSInt.h"
@@ -1165,6 +1166,9 @@ class Preprocessor {
 
   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
 
+  /// Buffers for used #embed directives
+  std::vector<std::string> EmbedBuffers;
+
 public:
   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
                DiagnosticsEngine &diags, const LangOptions &LangOpts,
@@ -1735,15 +1739,15 @@ class Preprocessor {
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
   struct LexEmbedParametersResult {
-    bool Successful;
-    std::optional<size_t> MaybeLimitParam;
-    std::optional<size_t> MaybeOffsetParam;
-    std::optional<SmallVector<Token, 2>> MaybeIfEmptyParam;
-    std::optional<SmallVector<Token, 2>> MaybePrefixParam;
-    std::optional<SmallVector<Token, 2>> MaybeSuffixParam;
-    int UnrecognizedParams;
+    std::optional<PPEmbedParameterLimit> MaybeLimitParam;
+    std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
+    std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
+    std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
+    std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
     SourceLocation StartLoc;
     SourceLocation EndLoc;
+    int UnrecognizedParams;
+    bool Successful;
   };
 
   LexEmbedParametersResult LexEmbedParameters(Token &Current,
@@ -1812,7 +1816,8 @@ class Preprocessor {
   /// Parses a simple integer literal to get its numeric value.  Floating
   /// point literals and user defined literals are rejected.  Used primarily to
   /// handle pragmas that accept integer arguments.
-  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
+  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value,
+                                 bool WithLex = true);
 
   /// Disables macro expansion everywhere except for preprocessor directives.
   void SetMacroExpansionOnlyInDirectives() {
@@ -2441,8 +2446,7 @@ class Preprocessor {
   /// reference is for system \#include's or not (i.e. using <> instead of "").
   OptionalFileEntryRef
   LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
-                  bool OpenFile,
-                  const FileEntry *LookupFromFile = nullptr,
+                  bool OpenFile, const FileEntry *LookupFromFile = nullptr,
                   SmallVectorImpl<char> *SearchPath = nullptr,
                   SmallVectorImpl<char> *RelativePath = nullptr);
 
@@ -2735,12 +2739,18 @@ class Preprocessor {
   // Binary data inclusion
   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
                             const FileEntry *LookupFromFile = nullptr);
-  void HandleEmbedDirectiveNaive(
-      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
-      StringRef BinaryContents, const size_t TargetCharWidth);
-  void HandleEmbedDirectiveBuiltin(
-      SourceLocation FilenameTok, LexEmbedParametersResult &Params,
-      StringRef BinaryContents, const size_t TargetCharWidth);
+  void HandleEmbedDirectiveNaive(SourceLocation HashLoc,
+                                 SourceLocation FilenameTok,
+                                 const LexEmbedParametersResult &Params,
+                                 StringRef BinaryContents,
+                                 const size_t TargetCharWidth);
+  void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc,
+                                   const Token &FilenameTok,
+                                   StringRef ResolvedFilename,
+                                   StringRef SearchPath, StringRef RelativePath,
+                                   const LexEmbedParametersResult &Params,
+                                   StringRef BinaryContents,
+                                   const size_t TargetCharWidth);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 2ebd21090ae4e11..d3c62d8e75650eb 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5981,6 +5981,10 @@ class Sema final {
                         ArrayRef<Expr *> Arg, SourceLocation RParenLoc,
                         Expr *Config = nullptr, bool IsExecConfig = false,
                         ADLCallKind UsesADL = ADLCallKind::NotADL);
+  /// `Fn` may be a null pointer.
+  void ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc,
+                               SmallVectorImpl<Expr *> &ArgExprs,
+                               SourceLocation RParenLoc);
 
   ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
                                      MultiExprArg ExecConfig,
@@ -6098,6 +6102,35 @@ class Sema final {
                                 SourceLocation BuiltinLoc,
                                 SourceLocation RPLoc);
 
+  // __builtin_pp_embed()
+  ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
+                              SourceLocation Base64DataLocation,
+                              SourceLocation RPLoc, StringLiteral *Filename,
+                              QualType DataTy, std::vector<char> BinaryData);
+
+  IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed);
+
+  PPEmbedExpr::Action
+  CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
+                              std::optional<QualType> MaybeInitType);
+  PPEmbedExpr::Action
+  ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
+                              SmallVectorImpl<Expr *> &OutputExprList,
+                              bool ClearOutputFirst = true);
+  PPEmbedExpr::Action
+  ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &OutputList);
+
+  enum PPEmbedExprContext {
+    PPEEC__StaticAssert,
+    PPEEC_StaticAssert,
+  };
+
+  StringRef GetLocationName(PPEmbedExprContext Context) const;
+
+  bool DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
+                           PPEmbedExprContext Context,
+                           bool SingleAllowed = true);
+
   // Build a potentially resolved SourceLocExpr.
   ExprResult BuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
                                 QualType ResultTy, SourceLocation BuiltinLoc,
@@ -8290,6 +8323,10 @@ class Sema final {
                                        SourceLocation EqualLoc,
                                        ParsedTemplateArgument DefaultArg);
 
+  void ModifyTemplateArguments(
+      const TemplateTy &Template,
+      SmallVectorImpl<ParsedTemplateArgument> &TemplateArgs);
+
   TemplateParameterList *
   ActOnTemplateParameterList(unsigned Depth,
                              SourceLocation ExportLoc,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5c32fbc079c9a65..138c52bc8149fc8 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1715,6 +1715,9 @@ enum StmtCode {
   /// A SourceLocExpr record.
   EXPR_SOURCE_LOC,
 
+  /// A PPEmbedExpr record.
+  EXPR_BUILTIN_PP_EMBED,
+
   /// A ShuffleVectorExpr record.
   EXPR_SHUFFLE_VECTOR,
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 4bfc4f082cd6a69..f0c0359cd9feaf9 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2392,6 +2392,21 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
   llvm_unreachable("unhandled case");
 }
 
+PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy,
+                         StringLiteral *Filename, StringLiteral *BinaryData,
+                         SourceLocation BLoc, SourceLocation RParenLoc,
+                         DeclContext *ParentContext)
+    : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary),
+      BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext),
+      Filename(Filename), BinaryData(BinaryData) {
+  setDependence(ExprDependence::None);
+}
+
+size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const {
+  return getDataStringLiteral()->getByteLength() /
+         (Context.getTypeSize(getType()) / Context.getTypeSize(Context.CharTy));
+}
+
 InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc,
                            ArrayRef<Expr *> initExprs, SourceLocation rbraceloc)
     : Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary),
@@ -3610,6 +3625,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CXXUuidofExprClass:
   case OpaqueValueExprClass:
   case SourceLocExprClass:
+  case PPEmbedExprClass:
   case ConceptSpecializationExprClass:
   case RequiresExprClass:
   case SYCLUniqueStableNameExprClass:
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index ffa7c6802ea6e19..fbbbd72b1445716 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::RequiresExprClass:
     return Cl::CL_PRValue;
 
+  case Expr::PPEmbedExprClass:
+    // Nominally, this just goes through as a PRValue until we actually expand
+    // it and check it.
+    return Cl::CL_PRValue;
+
   // Make HLSL this reference-like
   case Expr::CXXThisExprClass:
     return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e5539dedec02a4b..b6967cc97d78c5d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -8921,6 +8921,11 @@ class PointerExprEvaluator
     return true;
   }
 
+  bool VisitPPEmbedExpr(const PPEmbedExpr *E) {
+    llvm_unreachable("Not yet implemented for ExprConstant.cpp");
+    return true;
+  }
+
   bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) {
     std::string ResultStr = E->ComputeName(Info.Ctx);
 
@@ -16166,6 +16171,9 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
       return ICEDiag(IK_NotICE, E->getBeginLoc());
     return CheckICE(cast<CastExpr>(E)->getSubExpr(), Ctx);
   }
+  case Expr::PPEmbedExprClass: {
+    return ICEDiag(IK_ICE, E->getBeginLoc());
+  }
   }
 
   llvm_unreachable("Invalid StmtClass!");
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 23ec35cae4b7b40..f08fb766efd777d 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4721,6 +4721,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
   case Expr::PseudoObjectExprClass:
   case Expr::AtomicExprClass:
   case Expr::SourceLocExprClass:
+  case Expr::PPEmbedExprClass:
   case Expr::BuiltinBitCastExprClass:
   {
     NotPrimaryExpr();
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index a31aa0cfeeed8de..f94386be7788474 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -49,6 +49,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1145,6 +1146,12 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
   OS << Node->getBuiltinStr() << "()";
 }
 
+void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) {
+  OS << "__builtin_pp_embed(" << Node->getType() << ", "
+     << Node->getFilenameStringLiteral()->getBytes() << ", \""
+     << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")";
+}
+
 void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
   PrintExpr(Node->getSubExpr());
 }
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 22b6855b0fff23c..0be044f54a819ee 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2284,6 +2284,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) {
   VisitExpr(E);
 }
 
+void StmtProfiler::VisitPPEmbedExpr(const PPEmbedExpr *E) { VisitExpr(E); }
+
 void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); }
 
 void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) {
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index e0e80b5e0fbedbe..d8a5b56438ad33d 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -549,7 +549,6 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
   if (MaybeLimit)
     FileSize = *MaybeLimit;
 
-
   // If there's a high enough chance that the file have changed since we
   // got its size, force a stat before opening it.
   if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d2b5426d27bb3b2..96ac3663ca6658b 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -422,8 +422,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // collisions (if there were, the switch below would complain about duplicate
   // case values).  Note that this depends on 'if' being null terminated.
 
-#define HASH(LEN, FIRST, THIRD) \
-  (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63)
+#define HASH(LEN, FIRST, THIRD)                                                \
+  (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
 #define CASE(LEN, FIRST, THIRD, NAME) \
   case HASH(LEN, FIRST, THIRD): \
     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -438,7 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 4, 'e', 's', else);
   CASE( 4, 'l', 'n', line);
   CASE( 4, 's', 'c', sccs);
-  CASE( 5, 'e', 'b', embed);
+  CASE(5, 'e', 'b', embed);
   CASE( 5, 'e', 'd', endif);
   CASE( 5, 'e', 'r', error);
   CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fc2f749a34fc471..53a92502b463b57 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1324,7 +1324,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group});
+                   options::OPT_F, options::OPT_index_header_map,
+                   options::OPT_EmbedPath_Group});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index e405a9085951dc0..0a3c16f3a669c70 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1399,8 +1399,7 @@ class AnnotatingParser {
       if (Tok->isOneOf(Keywords.kw___has_include,
                        Keywords.kw___has_include_next)) {
         parseHasInclude();
-      }
-      else if (Tok->is(Keywords.kw___has_embed)) {
+      } else if (Tok->is(Keywords.kw___has_embed)) {
         parseHasEmbed();
       }
       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 10558b1d34bf623..04ddb92ff7f7b67 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -65,11 +65,11 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
-  void EmbedDirective(SourceLocation HashLoc,
-                          StringRef FileName, bool IsAngled,
-                          CharSourceRange FilenameRange, CharSourceRange ParametersRange,
-                          OptionalFileEntryRef File, StringRef SearchPath,
-                          StringRef RelativePath) override {
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      CharSourceRange FilenameRange,
+                      CharSourceRange ParametersRange,
+                      OptionalFileEntryRef File, StringRef SearchPath,
+                      StringRef RelativePath) override {
     if (!File)
       DepCollector.maybeAddDependency(FileName,
                                       /*FromModule*/ false,
@@ -97,14 +97,13 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
   }
 
   void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
-                  OptionalFileEntryRef File) override {
+                OptionalFileEntryRef File) override {
     if (!File)
       return;
     StringRef Filename =
         llvm::sys::path::remove_leading_dotslash(File->getName());
     DepCollector.maybeAddDependency(Filename,
-                                    /*FromModule=*/false,
-                                    false,
+                                    /*FromModule=*/false, false,
                                     /*IsModuleFile=*/false,
                                     &PP.getFileManager(),
                                     /*IsMissing=*/false);
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 683f751a94244ec..4049a5245de7d34 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -53,7 +53,7 @@ class DependencyGraphCallback : public PPCallbacks {
   DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
                           StringRef SysRoot,
                           DirectiveBehavior Action = IgnoreEmbed)
-    : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
+      : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {}
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index fb9baa92e6836d3..1d93ad97305da87 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -107,9 +107,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 
 public:
   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
-                           bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives,
-                           bool UseLineDirectives, bool MinimizeWhitespace,
-                           bool DirectivesOnly, bool KeepSystemIncludes)
+                           bool defines, bool DumpIncludeDirectives,
+                           bool DumpEmbedDirectives, bool UseLineDirectives,
+                           bool MinimizeWhitespace, bool DirectivesOnly,
+                           bool KeepSystemIncludes)
       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
         DisableLineMarkers(lineMarkers), DumpDefines(defines),
         DumpIncludeDirectives(DumpIncludeDirectives),
@@ -414,7 +415,7 @@ void PrintPPOutputPPCallbacks::EmbedDirective(
   if (DumpEmbedDirectives) {
     MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
     *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
-       << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
+        << (IsAngled ? '>' : '"') << " /* clang -E -dE */";
     setEmittedDirectiveOnThisLine();
   }
 }
@@ -1002,8 +1003,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
-      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives,
-      Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
+      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives,
+      Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+      Opts.KeepSystemIncludes);
 
   // Expand macros in pragmas with -fms-extensions.  The assumption is that
   // the majority of pragmas in such a file will be Microsoft pragmas.
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 7968c62cbd3e7b3..e2e55daa77b854a 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -566,6 +566,7 @@ class RuntimeInterfaceBuilder
         CStyleCastPtrExpr(S, Ctx.VoidPtrTy, (uintptr_t)Ty.getAsOpaquePtr());
     // The QualType parameter `OpaqueType`, represented as `void*`.
     Args.push_back(TypeArg);
+    S.ModifyCallExprArguments(nullptr, E->getBeginLoc(), Args, E->getEndLoc());
 
     // We push the last parameter based on the type of the Expr. Note we need
     // special care for rvalue struct.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index feed1b9ecd71a8d..b55b4c360d44298 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -417,6 +417,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
     }
   }
 
+  // NOTE: this is to prevent a few cases where token streams with
+  // commas are used to print with pseudo-locations after a faux-expansion
+  // cause reading a bogus location from a source file that does not exist.
+  if (Tok.is(tok::comma)) {
+    Buffer = ",";
+    return 1;
+  }
+
   // NOTE: this can be checked even after testing for an IdentifierInfo.
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e0d98d7ca03fa11..1696c1a40c3d46b 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -42,11 +42,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <algorithm>
 #include <cassert>
+#include <cmath>
 #include <cstring>
 #include <new>
 #include <optional>
@@ -3631,10 +3633,12 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
   SmallVector<Token, 2> ParameterTokens;
   tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod;
   Result.StartLoc = CurTok.getLocation();
+  Result.EndLoc = CurTok.getLocation();
   for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) {
     Parameter.clear();
     // Lex identifier [:: identifier ...]
     if (!CurTok.is(tok::identifier)) {
+      Result.EndLoc = CurTok.getEndLoc();
       Diag(CurTok, diag::err_expected) << "identifier";
       DiscardUntilEndOfDirective();
       return Result;
@@ -3647,6 +3651,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       Parameter.append("::");
       LexNonComment(CurTok);
       if (!CurTok.is(tok::identifier)) {
+        Result.EndLoc = CurTok.getEndLoc();
         Diag(CurTok, diag::err_expected) << "identifier";
         DiscardUntilEndOfDirective();
         return Result;
@@ -3670,25 +3675,19 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
         return Result;
       }
       const llvm::APSInt &LimitResult = *LimitEvalResult.Value;
-      const bool ValueDoesNotFit =
-          LimitResult.getBitWidth() > 64
-              ? true
-              : (LimitResult.isUnsigned() ||
-                 (LimitResult.isSigned() && LimitResult.isNegative()));
-      if (ValueDoesNotFit) {
+      if (LimitResult.getBitWidth() > 64) {
         Diag(CurTok, diag::warn_pp_expr_overflow);
-        // just truncate and roll with that, I guess?
-        Result.MaybeLimitParam =
-            static_cast<size_t>(LimitResult.getRawData()[0]);
-      } else {
-        Result.MaybeLimitParam =
-            static_cast<size_t>(LimitResult.getZExtValue());
       }
+      size_t LimitValue = 0;
+      LimitValue = LimitResult.getLimitedValue();
+      Result.MaybeLimitParam = PPEmbedParameterLimit{
+          LimitValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()};
       LexNonComment(CurTok);
     } else if (Parameter == "clang::offset") {
       // we have a limit parameter and its internals are processed using
       // evaluation rules from #if - handle here
       if (CurTok.isNot(tok::l_paren)) {
+        Result.EndLoc = CurTok.getEndLoc();
         Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter;
         DiscardUntilEndOfDirective();
         return Result;
@@ -3697,18 +3696,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       DirectiveEvalResult OffsetEvalResult =
           EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true);
       if (!OffsetEvalResult.Value) {
+        Result.EndLoc = CurTok.getEndLoc();
         return Result;
       }
       const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value;
+      size_t OffsetValue;
       if (OffsetResult.getBitWidth() > 64) {
         Diag(CurTok, diag::warn_pp_expr_overflow);
-        // just truncate and roll with that, I guess?
-        Result.MaybeOffsetParam =
-            static_cast<size_t>(OffsetResult.getRawData()[0]);
-      } else {
-        Result.MaybeOffsetParam =
-            static_cast<size_t>(OffsetResult.getZExtValue());
       }
+      OffsetValue = OffsetResult.getLimitedValue();
+      Result.MaybeOffsetParam = PPEmbedParameterOffset{
+          OffsetValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()};
       LexNonComment(CurTok);
     } else {
       if (CurTok.is(tok::l_paren)) {
@@ -3764,6 +3762,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
           return true;
         };
         if (!ParseArgToken()) {
+          Result.EndLoc = CurTok.getEndLoc();
           return Result;
         }
         if (!CurTok.is(tok::r_paren)) {
@@ -3775,14 +3774,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
       }
       // "Token-soup" parameters
       if (Parameter == "if_empty") {
-        // TODO: integer list optimization
-        Result.MaybeIfEmptyParam = std::move(ParameterTokens);
+        Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
+            std::move(ParameterTokens), ParameterStartTok.getLocation(),
+            CurTok.getLocation()};
       } else if (Parameter == "prefix") {
-        // TODO: integer list optimization
-        Result.MaybePrefixParam = std::move(ParameterTokens);
+        Result.MaybePrefixParam = PPEmbedParameterPrefix{
+            std::move(ParameterTokens), ParameterStartTok.getLocation(),
+            CurTok.getLocation()};
       } else if (Parameter == "suffix") {
-        // TODO: integer list optimization
-        Result.MaybeSuffixParam = std::move(ParameterTokens);
+        Result.MaybeSuffixParam = PPEmbedParameterSuffix{
+            std::move(ParameterTokens), ParameterStartTok.getLocation(),
+            CurTok.getLocation()};
       } else {
         ++Result.UnrecognizedParams;
         if (DiagnoseUnknown) {
@@ -3793,6 +3795,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed,
     }
   }
   Result.Successful = true;
+  Result.EndLoc = CurTok.getEndLoc();
   return Result;
 }
 
@@ -3823,89 +3826,327 @@ inline constexpr const char *IntegerLiterals[] = {
     "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
     "253", "254", "255"};
 
-void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc,
-                                              LexEmbedParametersResult &Params,
-                                              StringRef BinaryContents,
-                                              const size_t TargetCharWidth) {
-  (void)TargetCharWidth; // for later, when we support various sizes
-  size_t TokenIndex = 0;
-  const size_t InitListTokensSize = [&]() {
-    if (BinaryContents.empty()) {
-      if (Params.MaybeIfEmptyParam) {
-        return Params.MaybeIfEmptyParam->size();
+static size_t
+ComputeNaiveReserveSize(const Preprocessor::LexEmbedParametersResult &Params,
+                        StringRef TypeName, StringRef BinaryContents,
+                        SmallVectorImpl<char> &TokSpellingBuffer) {
+  size_t ReserveSize = 0;
+  if (BinaryContents.empty()) {
+    if (Params.MaybeIfEmptyParam) {
+      for (const auto &Tok : Params.MaybeIfEmptyParam->Tokens) {
+        const size_t TokLen = Tok.getLength();
+        if (TokLen > TokSpellingBuffer.size()) {
+          TokSpellingBuffer.resize(TokLen);
+        }
+        ReserveSize += TokLen;
+      }
+    }
+  } else {
+    if (Params.MaybePrefixParam) {
+      for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
+        const size_t TokLen = Tok.getLength();
+        if (TokLen > TokSpellingBuffer.size()) {
+          TokSpellingBuffer.resize(TokLen);
+        }
+        ReserveSize += TokLen;
+      }
+    }
+    for (const auto &Byte : BinaryContents) {
+      ReserveSize += 3 + TypeName.size(); // ((type-name)
+      if (Byte > 99) {
+        ReserveSize += 3; // ###
+      } else if (Byte > 9) {
+        ReserveSize += 2; // ##
       } else {
-        return static_cast<size_t>(0);
+        ReserveSize += 1; // #
       }
-    } else {
-      return static_cast<size_t>(
-          (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) +
-          (BinaryContents.size() * 2 - 1) +
-          (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0));
+      ReserveSize += 2; // ),
     }
-  }();
-  std::unique_ptr<Token[]> InitListTokens(new Token[InitListTokensSize]());
+    if (Params.MaybePrefixParam) {
+      for (const auto &Tok : Params.MaybePrefixParam->Tokens) {
+        const size_t TokLen = Tok.getLength();
+        if (TokLen > TokSpellingBuffer.size()) {
+          TokSpellingBuffer.resize(TokLen);
+        }
+        ReserveSize += TokLen;
+      }
+    }
+  }
+  return ReserveSize;
+}
 
+void Preprocessor::HandleEmbedDirectiveNaive(
+    SourceLocation HashLoc, SourceLocation FilenameLoc,
+    const LexEmbedParametersResult &Params, StringRef BinaryContents,
+    const size_t TargetCharWidth) {
+  // Load up a new embed buffer for this file and set of parameters in
+  // particular.
+  EmbedBuffers.push_back("");
+  size_t EmbedBufferNumber = EmbedBuffers.size();
+  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
+  llvm::Twine EmbedBufferName = [](const std::string &Number) {
+    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
+    return PrefixNumber.concat(">");
+  }(EmbedBufferNumberVal);
+  std::string &TargetEmbedBuffer = EmbedBuffers.back();
+  const size_t TotalSize = BinaryContents.size();
+  // In the future, this might change/improve.
+  const StringRef TypeName = "unsigned char";
+
+  SmallVector<char, 32> TokSpellingBuffer(32, 0);
+  const size_t ReserveSize = ComputeNaiveReserveSize(
+      Params, TypeName, BinaryContents, TokSpellingBuffer);
+  TargetEmbedBuffer.reserve(ReserveSize);
+
+  // Generate the look-alike source file
   if (BinaryContents.empty()) {
     if (Params.MaybeIfEmptyParam) {
-      std::copy(Params.MaybeIfEmptyParam->begin(),
-                Params.MaybeIfEmptyParam->end(), InitListTokens.get());
-      TokenIndex += Params.MaybeIfEmptyParam->size();
-      assert(TokenIndex == InitListTokensSize);
-      EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true,
-                       true);
+      const PPEmbedParameterIfEmpty &EmptyParam = *Params.MaybeIfEmptyParam;
+      for (const auto &Tok : EmptyParam.Tokens) {
+        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+      }
+    }
+  } else {
+    if (Params.MaybePrefixParam) {
+      const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
+      for (const auto &Tok : PrefixParam.Tokens) {
+        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+      }
+    }
+    for (size_t I = 0; I < TotalSize; ++I) {
+      unsigned char ByteValue = BinaryContents[I];
+      StringRef ByteRepresentation = IntegerLiterals[ByteValue];
+      TargetEmbedBuffer.append(2, '(');
+      TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
+      TargetEmbedBuffer.append(1, ')');
+      TargetEmbedBuffer.append(ByteRepresentation.data(),
+                               ByteRepresentation.size());
+      TargetEmbedBuffer.append(1, ')');
+      bool AtEndOfContents = I == (TotalSize - 1);
+      if (!AtEndOfContents) {
+        TargetEmbedBuffer.append(1, ',');
+      }
+    }
+    if (Params.MaybeSuffixParam) {
+      const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
+      for (const auto &Tok : SuffixParam.Tokens) {
+        StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer);
+        TargetEmbedBuffer.append(Spelling.data(), Spelling.size());
+      }
     }
-    return;
   }
 
-  // FIXME: this does not take the target's byte size into account;
-  // will fail on many DSPs and embedded machines!
+  // Create faux-file and its ID, backed by a memory buffer.
+  std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
+      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+  assert(EmbedMemBuffer && "Cannot create predefined source buffer");
+  FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
+  assert(EmbedBufferFID.isValid() &&
+         "Could not create FileID for #embed directive?");
+  // Start parsing the look-alike source file for the embed directive and
+  // pretend everything is normal
+  // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
+  EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
+}
+
+static bool TokenListIsCharacterArray(Preprocessor &PP,
+                                      const size_t TargetCharWidth,
+                                      bool IsPrefix,
+                                      const SmallVectorImpl<Token> &Tokens,
+                                      llvm::SmallVectorImpl<char> &Output) {
+  const bool IsSuffix = !IsPrefix;
+  size_t MaxValue =
+      static_cast<size_t>(std::pow((size_t)2, TargetCharWidth)) - 1u;
+  size_t TokenIndex = 0;
+  // if it's a suffix, we are expecting a comma first
+  // if it's a prefix, we are expecting a numeric literal first
+  bool ExpectingNumericLiteral = IsPrefix;
+  const size_t TokensSize = Tokens.size();
+  if (Tokens.empty()) {
+    return true;
+  }
+  for (; TokenIndex < TokensSize;
+       (void)++TokenIndex, ExpectingNumericLiteral = !ExpectingNumericLiteral) {
+    const Token &Tok = Tokens[TokenIndex];
+    // TODO: parse an optional, PLAIN `(unsigned char)` cast in front of the
+    // literals, since the Spec technically decrees each element is of type
+    // `unsigned char` (unless we have a potential future extension for
+    // `clang::type(meow)` as an embed parameter
+    if (ExpectingNumericLiteral) {
+      if (Tok.isNot(tok::numeric_constant)) {
+        return false;
+      }
+      uint64_t Value = {};
+      Token ParsingTok = Tok;
+      if (!PP.parseSimpleIntegerLiteral(ParsingTok, Value, false)) {
+        // numeric literal is a floating point literal or a UDL; too complex for
+        // us
+        return false;
+      }
+      if (Value > MaxValue || Value > static_cast<uint64_t>(0xFF)) {
+        // number is too large
+        return false;
+      }
+      Output.push_back((char)Value);
+    } else {
+      if (Tok.isNot(tok::comma)) {
+        return false;
+      }
+    }
+  }
+  const bool EndedOnNumber = !ExpectingNumericLiteral;
+  if (IsPrefix && EndedOnNumber) {
+    // we ended on a number: this is a failure for prefix!
+    return false;
+  }
+  const bool EndedOnComma = ExpectingNumericLiteral;
+  if (IsSuffix && EndedOnComma) {
+    // we ended on a comma: this is a failure for suffix!
+    return false;
+  }
+  // if all tokens have been consumed by the above process, then we have
+  // succeeded.
+  return TokenIndex == TokensSize;
+}
+
+static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1,
+                               StringRef Bytes2, std::string &OutputBuffer) {
+  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                              "abcdefghijklmnopqrstuvwxyz"
+                              "0123456789+/";
+  const size_t TotalSize = Bytes0.size() + Bytes1.size() + Bytes2.size();
+  const size_t Bytes0Size = Bytes0.size();
+  const size_t Bytes01Size = Bytes0.size() + Bytes1.size();
+  const size_t IndexOffset = OutputBuffer.size();
+  OutputBuffer.resize(OutputBuffer.size() + (((TotalSize + 2) / 3) * 4));
+  auto IndexInto = [&](size_t i) -> unsigned char {
+    if (i >= Bytes0Size) {
+      if (i >= Bytes01Size) {
+        return Bytes2[i - Bytes01Size];
+      }
+      return Bytes1[i - Bytes0Size];
+    }
+    return Bytes0[i];
+  };
+
+  size_t i = 0, j = 0;
+  for (size_t n = TotalSize / 3 * 3; i < n; i += 3, j += 4) {
+    uint32_t x = ((unsigned char)IndexInto(i) << 16) |
+                 ((unsigned char)IndexInto(i + 1) << 8) |
+                 (unsigned char)IndexInto(i + 2);
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
+  }
+  if (i + 1 == TotalSize) {
+    uint32_t x = ((unsigned char)IndexInto(i) << 16);
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = '=';
+    OutputBuffer[IndexOffset + j + 3] = '=';
+  } else if (i + 2 == TotalSize) {
+    uint32_t x = ((unsigned char)IndexInto(i) << 16) |
+                 ((unsigned char)IndexInto(i + 1) << 8);
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = '=';
+  }
+}
+
+void Preprocessor::HandleEmbedDirectiveBuiltin(
+    SourceLocation HashLoc, const Token &FilenameTok,
+    StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath,
+    const LexEmbedParametersResult &Params, StringRef BinaryContents,
+    const size_t TargetCharWidth) {
+  // if it's empty, just process it like a normal expanded token stream
+  if (BinaryContents.empty()) {
+    HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+                              BinaryContents, TargetCharWidth);
+    return;
+  }
+  SmallVector<char, 2> BinaryPrefix{};
+  SmallVector<char, 2> BinarySuffix{};
   if (Params.MaybePrefixParam) {
-    std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(),
-              InitListTokens.get() + TokenIndex);
-    TokenIndex += Params.MaybePrefixParam->size();
-  }
-  for (size_t I = 0; I < BinaryContents.size(); ++I) {
-    unsigned char ByteValue = BinaryContents[I];
-    StringRef ByteRepresentation = IntegerLiterals[ByteValue];
-    const size_t InitListIndex = TokenIndex;
-    Token &IntToken = InitListTokens[InitListIndex];
-    IntToken.setKind(tok::numeric_constant);
-    IntToken.setLiteralData(ByteRepresentation.data());
-    IntToken.setLength(ByteRepresentation.size());
-    IntToken.setLocation(FilenameLoc);
-    ++TokenIndex;
-    bool AtEndOfContents = I == (BinaryContents.size() - 1);
-    if (!AtEndOfContents) {
-      const size_t CommaInitListIndex = InitListIndex + 1;
-      Token &CommaToken = InitListTokens[CommaInitListIndex];
-      CommaToken.setKind(tok::comma);
-      CommaToken.setLocation(FilenameLoc);
-      ++TokenIndex;
+    // If we ahve a prefix, validate that it's a good fit for direct data
+    // embedded (and prepare to prepend it)
+    const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam;
+    if (!TokenListIsCharacterArray(*this, TargetCharWidth, true,
+                                   PrefixParam.Tokens, BinaryPrefix)) {
+      HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+                                BinaryContents, TargetCharWidth);
+      return;
     }
   }
   if (Params.MaybeSuffixParam) {
-    std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(),
-              InitListTokens.get() + TokenIndex);
-    TokenIndex += Params.MaybeSuffixParam->size();
+    // If we ahve a prefix, validate that it's a good fit for direct data
+    // embedding (and prepare to append it)
+    const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam;
+    if (!TokenListIsCharacterArray(*this, TargetCharWidth, false,
+                                   SuffixParam.Tokens, BinarySuffix)) {
+      HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params,
+                                BinaryContents, TargetCharWidth);
+      return;
+    }
   }
-  assert(TokenIndex == InitListTokensSize);
-  EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false);
-}
 
-void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc,
-                                               LexEmbedParametersResult &Params,
-                                               StringRef BinaryContents,
-                                               const size_t TargetCharWidth) {
-  // TODO: implement direct built-in support
-  HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
-                             TargetCharWidth);
+  // Load up a new embed buffer for this file and set of parameters in
+  // particular.
+  EmbedBuffers.push_back("");
+  size_t EmbedBufferNumber = EmbedBuffers.size();
+  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
+  llvm::Twine EmbedBufferName = [](const std::string &Number) {
+    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
+    return PrefixNumber.concat(">");
+  }(EmbedBufferNumberVal);
+  std::string &TargetEmbedBuffer = EmbedBuffers.back();
+  StringRef TypeName = "unsigned char";
+  const size_t TotalSize =
+      BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size();
+  const size_t ReserveSize =        // add up for necessary size:
+      19                            // __builtin_pp_embed(
+      + TypeName.size()             // type-name
+      + 2                           // ,"
+      + ResolvedFilename.size()     // file-name
+      + 3                           // ","
+      + (((TotalSize + 2) / 3) * 4) // base64-string
+      + 2                           // ");
+      ;
+  // Reserve appropriate size
+  TargetEmbedBuffer.reserve(ReserveSize);
+
+  // Generate the look-alike source file
+  TargetEmbedBuffer.append("__builtin_pp_embed(");
+  TargetEmbedBuffer.append(TypeName.data(), TypeName.size());
+  TargetEmbedBuffer.append(",\"");
+  TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size());
+  TargetEmbedBuffer.append("\",\"");
+  // include the prefix(...) and suffix(...) binary data in the total contents
+  TripleEncodeBase64(
+      StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents,
+      StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer);
+  TargetEmbedBuffer.append("\")");
+  // Create faux-file and its ID, backed by a memory buffer.
+  std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
+      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+  assert(EmbedMemBuffer && "Cannot create predefined source buffer");
+  FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
+  assert(EmbedBufferFID.isValid() &&
+         "Could not create FileID for #embed directive?");
+  // Start parsing the look-alike source file for the embed directive and
+  // pretend everything is normal
+  // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™.
+  EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false);
 }
 
 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                                         const FileEntry *LookupFromFile) {
   if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
-    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed
-                                          : diag::warn_cxx26_pp_embed);
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_embed
+                                          : diag::warn_c23_pp_embed);
     Diag(EmbedTok, EitherDiag);
   }
 
@@ -3952,18 +4193,16 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
     if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
       return;
     }
-    Diag(FilenameTok, diag::err_pp_file_not_found)
-        << Filename;
+    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
     return;
   }
   std::optional<int64_t> MaybeSignedLimit{};
   if (Params.MaybeLimitParam) {
-    if (static_cast<uint64_t>(INT64_MAX) >= *Params.MaybeLimitParam) {
-      MaybeSignedLimit = static_cast<int64_t>(*Params.MaybeLimitParam);
-    }
+    MaybeSignedLimit = static_cast<int64_t>(Params.MaybeLimitParam->Limit);
   }
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile = getFileManager().getBufferForFile(
-      *MaybeFileRef, false, false, MaybeSignedLimit);
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeFile =
+      getFileManager().getBufferForFile(*MaybeFileRef, false, false,
+                                        MaybeSignedLimit);
   if (!MaybeFile) {
     // could not find file
     Diag(FilenameTok, diag::err_cannot_open_file)
@@ -3973,7 +4212,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   StringRef BinaryContents = MaybeFile.get()->getBuffer();
   if (Params.MaybeOffsetParam) {
     // offsets all the way to the end of the file make for an empty file.
-    const size_t OffsetParam = *Params.MaybeOffsetParam;
+    const size_t &OffsetParam = Params.MaybeOffsetParam->Offset;
     BinaryContents = BinaryContents.substr(OffsetParam);
   }
   const size_t TargetCharWidth = getTargetInfo().getCharWidth();
@@ -4009,11 +4248,12 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                               RelativePath);
   }
   if (PPOpts->NoBuiltinPPEmbed) {
-    HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents,
+    HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents,
                               TargetCharWidth);
   } else {
     // emit a token directly, handle it internally.
-    HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents,
+    HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath,
+                                RelativePath, Params, BinaryContents,
                                 TargetCharWidth);
   }
 }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 6e0163ccc89b7fb..7f6c964b0d68a3b 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1270,8 +1270,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
 int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   // pedwarn for not being on C23
   if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
-    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed
-                                          : diag::warn_cxx26_pp_has_embed);
+    auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
+                                          : diag::warn_c23_pp_has_embed);
     Diag(Tok, EitherDiag);
   }
 
@@ -1321,7 +1321,8 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   SourceLocation FilenameLoc = Tok.getLocation();
   Token FilenameTok = Tok;
 
-  Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false);
+  Preprocessor::LexEmbedParametersResult Params =
+      this->LexEmbedParameters(Tok, true, false);
   if (!Params.Successful) {
     if (Tok.isNot(tok::eod))
       this->DiscardUntilEndOfDirective();
@@ -1339,7 +1340,6 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     return VALUE__STDC_EMBED_NOT_FOUND__;
   }
 
-
   SmallString<128> FilenameBuffer;
   SmallString<256> RelativePath;
   StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
@@ -1351,11 +1351,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   assert(!Filename.empty());
   const FileEntry *LookupFromFile =
       this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry()
-                               : nullptr;
+                                  : nullptr;
   OptionalFileEntryRef MaybeFileEntry =
       this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false,
-                            LookupFromFile, nullptr,
-                            &RelativePath);
+                            LookupFromFile, nullptr, &RelativePath);
   if (Callbacks) {
     Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
   }
@@ -1363,11 +1362,15 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     return VALUE__STDC_EMBED_NOT_FOUND__;
   }
   size_t FileSize = MaybeFileEntry->getSize();
-  if (FileSize == 0 ||
-      (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) {
+  if (Params.MaybeLimitParam) {
+    if (FileSize > Params.MaybeLimitParam->Limit) {
+      FileSize = Params.MaybeLimitParam->Limit;
+    }
+  }
+  if (FileSize == 0) {
     return VALUE__STDC_EMBED_EMPTY__;
   }
-  if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) {
+  if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) {
     return VALUE__STDC_EMBED_EMPTY__;
   }
   return VALUE__STDC_EMBED_FOUND__;
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index ede4c51487ffbe7..10eb6d268b37b1d 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1411,7 +1411,8 @@ bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
   return true;
 }
 
-bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
+bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value,
+                                             bool WithLex) {
   assert(Tok.is(tok::numeric_constant));
   SmallString<8> IntegerBuffer;
   bool NumberInvalid = false;
@@ -1426,7 +1427,8 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
   llvm::APInt APVal(64, 0);
   if (Literal.GetIntegerValue(APVal))
     return false;
-  Lex(Tok);
+  if (WithLex)
+    Lex(Tok);
   Value = APVal.getLimitedValue();
   return true;
 }
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 9dbfc1c8c5e9ffe..ef3ae580a43aeb9 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -32,6 +32,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/TypoCorrection.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Base64.h"
 #include <optional>
 using namespace clang;
 
@@ -741,6 +742,8 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 };
 }
 
+// clang-format off
+
 /// Parse a cast-expression, or, if \pisUnaryExpression is true, parse
 /// a unary-expression.
 ///
@@ -805,6 +808,7 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 /// [MS]    '__builtin_FUNCSIG' '(' ')'
 /// [GNU]   '__builtin_LINE' '(' ')'
 /// [CLANG] '__builtin_COLUMN' '(' ')'
+/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')'
 /// [GNU]   '__builtin_source_location' '(' ')'
 /// [GNU]   '__builtin_types_compatible_p' '(' type-name ',' type-name ')'
 /// [GNU]   '__null'
@@ -924,6 +928,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 ///                   '__is_rvalue_expr'
 /// \endverbatim
 ///
+
+// clang-format on
+
 ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
                                        bool isAddressOfOperand,
                                        bool &NotCastExpr,
@@ -1345,6 +1352,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
   case tok::kw___builtin_FUNCSIG:
   case tok::kw___builtin_LINE:
   case tok::kw___builtin_source_location:
+  case tok::kw___builtin_pp_embed:
     if (NotPrimaryExpression)
       *NotPrimaryExpression = true;
     // This parses the complete suffix; we can return early.
@@ -2145,6 +2153,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
       } else {
         Expr *Fn = LHS.get();
         SourceLocation RParLoc = Tok.getLocation();
+        Actions.ModifyCallExprArguments(Fn, Loc, ArgExprs, RParLoc);
         LHS = Actions.ActOnCallExpr(getCurScope(), Fn, Loc, ArgExprs, RParLoc,
                                     ExecConfig);
         if (LHS.isInvalid()) {
@@ -2560,6 +2569,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
   return Operand;
 }
 
+// clang-format off
+
 /// ParseBuiltinPrimaryExpression
 ///
 /// \verbatim
@@ -2575,6 +2586,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
 /// [MS]    '__builtin_FUNCSIG' '(' ')'
 /// [GNU]   '__builtin_LINE' '(' ')'
 /// [CLANG] '__builtin_COLUMN' '(' ')'
+/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')'
 /// [GNU]   '__builtin_source_location' '(' ')'
 /// [OCL]   '__builtin_astype' '(' assignment-expression ',' type-name ')'
 ///
@@ -2583,6 +2595,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
 /// [GNU]   offsetof-member-designator '.' identifier
 /// [GNU]   offsetof-member-designator '[' expression ']'
 /// \endverbatim
+
+// clang-format on
 ExprResult Parser::ParseBuiltinPrimaryExpression() {
   ExprResult Res;
   const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo();
@@ -2841,6 +2855,96 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen());
     break;
   }
+  case tok::kw___builtin_pp_embed: {
+    SourceRange DataTyExprSourceRange{};
+    TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
+
+    if (ExpectAndConsume(tok::comma)) {
+      SkipUntil(tok::r_paren, StopAtSemi);
+      Res = ExprError();
+    }
+
+    ExprResult FilenameArgExpr(ParseStringLiteralExpression());
+
+    if (ExpectAndConsume(tok::comma)) {
+      SkipUntil(tok::r_paren, StopAtSemi);
+      Res = ExprError();
+    }
+
+    ExprResult Base64ArgExpr(ParseStringLiteralExpression());
+
+    if (Tok.isNot(tok::r_paren)) {
+      Diag(Tok, diag::err_expected) << tok::r_paren;
+      Res = ExprError();
+    }
+
+    const ASTContext &Context = Actions.getASTContext();
+    QualType DataTy = Context.UnsignedCharTy;
+    size_t TargetWidth = Context.getTypeSize(DataTy);
+    if (DataTyExpr.isInvalid()) {
+      Res = ExprError();
+    } else {
+      DataTy = DataTyExpr.get().get().getCanonicalType();
+      TargetWidth = Context.getTypeSize(DataTy);
+      if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+          DataTy.getUnqualifiedType() != Context.CharTy) {
+        // TODO: check if is exactly the same as unsigned char
+        Diag(DataTyExprSourceRange.getBegin(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "only 'char' and 'unsigned char' are supported";
+        Res = ExprError();
+      }
+      if ((TargetWidth % CHAR_BIT) != 0) {
+        Diag(DataTyExprSourceRange.getBegin(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "width of element type is not a multiple of host platform's "
+               "CHAR_BIT!";
+        Res = ExprError();
+      }
+    }
+
+    StringLiteral *FilenameLiteral = nullptr;
+    if (FilenameArgExpr.isInvalid()) {
+      Res = ExprError();
+    } else {
+      FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+    }
+
+    std::vector<char> BinaryData{};
+    if (Base64ArgExpr.isInvalid()) {
+      Res = ExprError();
+    } else {
+      StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+      StringRef Base64StrData = Base64Str->getBytes();
+      if (Base64Str->getKind() != StringLiteral::Ordinary) {
+        Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+            << 0
+            << "'__builtin_pp_embed' with valid base64 encoding that is an "
+               "ordinary \"...\" string";
+      }
+      const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+        Diag(Base64Str->getExprLoc(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "expected a valid base64 encoded string";
+      };
+      llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+      llvm::handleAllErrors(std::move(Err), OnDecodeError);
+      if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+        Diag(DataTyExprSourceRange.getBegin(),
+             diag::err_builtin_pp_embed_invalid_argument)
+            << "size of data does not split evently into the number of bytes "
+               "requested";
+        Res = ExprError();
+      }
+    }
+
+    if (!Res.isInvalid()) {
+      Res = Actions.ActOnPPEmbedExpr(
+          StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(),
+          FilenameLiteral, DataTy, std::move(BinaryData));
+    }
+    break;
+  }
   }
 
   if (Res.isInvalid())
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index f556d0e6d4f8b6e..8364519861fe4f3 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -1671,6 +1671,8 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
     // arguments.
   } while (TryConsumeToken(tok::comma));
 
+  Actions.ModifyTemplateArguments(Template, TemplateArgs);
+
   return false;
 }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f249d41bc9bfbb6..44d8ddba080d82e 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13336,6 +13336,54 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
     return;
   }
 
+  // Adjust the init expression for PPEmbedExpr as early as possible
+  // here.
+  bool AlreadyAdjustedPPEmbedExpr = false;
+  if (InitListExpr *ILExpr = dyn_cast_if_present<InitListExpr>(Init); ILExpr) {
+    QualType VDeclTy = VDecl->getType();
+    ArrayRef<Expr *> Inits = ILExpr->inits();
+    if (CheckExprListForPPEmbedExpr(Inits, VDeclTy) == PPEmbedExpr::FoundOne) {
+      PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Inits[0]);
+      ILExpr->setInit(0, PPEmbed->getDataStringLiteral());
+      AlreadyAdjustedPPEmbedExpr = true;
+    }
+  }
+
+  if (!AlreadyAdjustedPPEmbedExpr) {
+    // If there is a PPEmbedExpr as a single initializer without braces,
+    // make sure it only produces a single element (and then expand said
+    // element).
+    if (PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(Init);
+        PPEmbed) {
+      if (PPEmbed->getDataElementCount(Context) == 1) {
+        // Expand the list in-place immediately, let the natural work take hold
+        Init = ExpandSinglePPEmbedExpr(PPEmbed);
+      } else {
+        // `__builtin_pp_embed( ... )` only produces 2 or more values.
+        Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type)
+            << "'__builtin_pp_embed'";
+        RealDecl->setInvalidDecl();
+        return;
+      }
+    }
+
+    // Legitimately, in all other cases, COMPLETELY nuke the PPEmbedExpr
+    // and turn it into a list of integers where applicable.
+    if (InitListExpr *ILExpr = dyn_cast_if_present<InitListExpr>(Init);
+        ILExpr) {
+      ArrayRef<Expr *> Inits = ILExpr->inits();
+      SmallVector<Expr *, 4> OutputExprList{};
+      if (ExpandPPEmbedExprInExprList(Inits, OutputExprList, false) ==
+          PPEmbedExpr::Expanded) {
+        ILExpr->resizeInits(Context, OutputExprList.size());
+        for (size_t I = 0; I < OutputExprList.size(); ++I) {
+          auto &InitExpr = OutputExprList[I];
+          ILExpr->setInit(I, InitExpr);
+        }
+      }
+    }
+  }
+
   // WebAssembly tables can't be used to initialise a variable.
   if (Init && !Init->getType().isNull() &&
       Init->getType()->isWebAssemblyTableType()) {
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index f9c010b1a002488..37321d2417a7d2e 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17022,7 +17022,8 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
                                          SourceLocation RParenLoc) {
   if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression))
     return nullptr;
-
+  if (DiagnosePPEmbedExpr(AssertExpr, StaticAssertLoc, PPEEC_StaticAssert))
+    return nullptr;
   return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr,
                                       AssertMessageExpr, RParenLoc, false);
 }
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 75730ea888afb41..ebeed7f4d2b485e 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1412,6 +1412,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Expr::SizeOfPackExprClass:
   case Expr::StringLiteralClass:
   case Expr::SourceLocExprClass:
+  case Expr::PPEmbedExprClass:
   case Expr::ConceptSpecializationExprClass:
   case Expr::RequiresExprClass:
     // These expressions can never throw.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cf45fc388083ce6..c10e6501daef6e2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7110,6 +7110,13 @@ static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S,
       << FixItHint::CreateInsertion(DRE->getLocation(), "std::");
 }
 
+void Sema::ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc,
+                                   SmallVectorImpl<Expr *> &ArgExprs,
+                                   SourceLocation RParenLoc) {
+  [[maybe_unused]] PPEmbedExpr::Action Action =
+      ExpandPPEmbedExprInExprList(ArgExprs);
+}
+
 ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
                                MultiExprArg ArgExprs, SourceLocation RParenLoc,
                                Expr *ExecConfig) {
@@ -7947,8 +7954,17 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
     }
   }
 
-  InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList,
-                                               RBraceLoc);
+  InitListExpr *E = nullptr;
+  if (InitArgList.size() > 1 &&
+      CheckExprListForPPEmbedExpr(InitArgList, std::nullopt) !=
+          PPEmbedExpr::NotFound) {
+    SmallVector<Expr *, 4> OutputExprList;
+    ExpandPPEmbedExprInExprList(InitArgList, OutputExprList);
+    E = new (Context)
+        InitListExpr(Context, LBraceLoc, OutputExprList, RBraceLoc);
+  } else {
+    E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc);
+  }
   E->setType(Context.VoidTy); // FIXME: just a place holder for now.
   return E;
 }
@@ -17570,6 +17586,225 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
       SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext);
 }
 
+ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
+                                  SourceLocation Base64DataLocation,
+                                  SourceLocation RPLoc, StringLiteral *Filename,
+                                  QualType ElementTy,
+                                  std::vector<char> BinaryData) {
+  uint64_t ArraySizeRawVal[] = {BinaryData.size()};
+  llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
+                                     1, ArraySizeRawVal));
+  QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
+                                                  ArrayType::Normal, 0);
+  StringLiteral *BinaryDataLiteral = StringLiteral::Create(
+      Context, StringRef(BinaryData.data(), BinaryData.size()),
+      StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation);
+  return new (Context)
+      PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
+                  RPLoc, CurContext);
+}
+
+IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed) {
+  assert(PPEmbed->getDataElementCount(Context) == 1 &&
+         "Data should only contain a single element");
+  StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+  QualType ElementTy = PPEmbed->getType();
+  const size_t TargetWidth = Context.getTypeSize(ElementTy);
+  const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+  StringRef Data = DataLiteral->getBytes();
+  SmallVector<uint64_t, 4> ByteVals{};
+  for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+    if ((ValIndex % sizeof(uint64_t)) == 0) {
+      ByteVals.push_back(0);
+    }
+    const unsigned char DataByte = Data[ValIndex];
+    ByteVals.back() |=
+        (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+  }
+  ArrayRef<uint64_t> ByteValsRef(ByteVals);
+  return IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                ElementTy, DataLiteral->getBeginLoc());
+}
+
+PPEmbedExpr::Action
+Sema::CheckExprListForPPEmbedExpr(ArrayRef<Expr *> ExprList,
+                                  std::optional<QualType> MaybeInitType) {
+  if (ExprList.empty()) {
+    return PPEmbedExpr::NotFound;
+  }
+  PPEmbedExpr *First = ExprList.size() == 1
+                           ? dyn_cast_if_present<PPEmbedExpr>(ExprList[0])
+                           : nullptr;
+  if (First) {
+    // only one and it's an embed
+    if (MaybeInitType) {
+      // With the type information, we have a duty to check if it matches;
+      // if not, explode it out into a list of integer literals.
+      QualType &InitType = *MaybeInitType;
+      if (InitType->isArrayType()) {
+        const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe();
+        QualType InitElementTy = InitArrayType->getElementType();
+        QualType PPEmbedExprElementTy = First->getType();
+        const bool TypesMatch =
+            Context.typesAreCompatible(InitElementTy, PPEmbedExprElementTy) ||
+            (InitElementTy->isCharType() && PPEmbedExprElementTy->isCharType());
+        if (TypesMatch) {
+          // Keep the PPEmbedExpr, report that everything has been found.
+          return PPEmbedExpr::FoundOne;
+        }
+      }
+    } else {
+      // leave it, possibly adjusted later!
+      return PPEmbedExpr::FoundOne;
+    }
+  }
+  if (std::find_if(ExprList.begin(), ExprList.end(),
+                   [](const Expr *const SomeExpr) {
+                     return isa<PPEmbedExpr>(SomeExpr);
+                   }) == ExprList.end()) {
+    // We didn't find one.
+    return PPEmbedExpr::NotFound;
+  }
+  // Otherwise, we found one but it is not the sole entry in the initialization
+  // list.
+  return PPEmbedExpr::Expanded;
+}
+
+PPEmbedExpr::Action
+Sema::ExpandPPEmbedExprInExprList(SmallVectorImpl<Expr *> &ExprList) {
+  PPEmbedExpr::Action Action = PPEmbedExpr::NotFound;
+  SmallVector<uint64_t, 4> ByteVals{};
+  for (size_t I = 0; I < ExprList.size();) {
+    Expr *&OriginalExpr = ExprList[I];
+    PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(OriginalExpr);
+    if (!PPEmbed) {
+      ++I;
+      continue;
+    }
+    auto ExprListIt = ExprList.erase(&OriginalExpr);
+    const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context);
+    if (ExpectedDataElements == 0) {
+      // No ++I, we are already pointing to newest element.
+      continue;
+    }
+    Action = PPEmbedExpr::Expanded;
+    StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+    QualType ElementTy = PPEmbed->getType();
+    const size_t TargetWidth = Context.getTypeSize(ElementTy);
+    const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+    StringRef Data = DataLiteral->getBytes();
+    size_t Insertions = 0;
+    for (size_t ByteIndex = 0; ByteIndex < Data.size();
+         ByteIndex += BytesPerElement) {
+      ByteVals.clear();
+      for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+        if ((ValIndex % sizeof(uint64_t)) == 0) {
+          ByteVals.push_back(0);
+        }
+        const unsigned char DataByte = Data[ByteIndex + ValIndex];
+        ByteVals.back() |=
+            (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+      }
+      ArrayRef<uint64_t> ByteValsRef(ByteVals);
+      IntegerLiteral *IntLit =
+          IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                 ElementTy, DataLiteral->getBeginLoc());
+      ExprListIt = ExprList.insert(ExprListIt, IntLit);
+      ++Insertions;
+      // make sure we are inserting **after** the item we just inserted, not
+      // before
+      ++ExprListIt;
+    }
+    assert(Insertions == ExpectedDataElements);
+    I += Insertions;
+  }
+  return PPEmbedExpr::Expanded;
+}
+
+PPEmbedExpr::Action
+Sema::ExpandPPEmbedExprInExprList(ArrayRef<Expr *> ExprList,
+                                  SmallVectorImpl<Expr *> &OutputExprList,
+                                  bool ClearOutputFirst) {
+  if (ClearOutputFirst) {
+    OutputExprList.clear();
+  }
+  size_t ExpectedResize = OutputExprList.size() + ExprList.size();
+  const auto FindPPEmbedExpr = [](const Expr *const SomeExpr) {
+    return isa<PPEmbedExpr>(SomeExpr);
+  };
+  if (std::find_if(ExprList.begin(), ExprList.end(), FindPPEmbedExpr) ==
+      ExprList.end()) {
+    return PPEmbedExpr::NotFound;
+  }
+  SmallVector<uint64_t, 4> ByteVals{};
+  OutputExprList.reserve(ExpectedResize);
+  for (size_t I = 0; I < ExprList.size(); ++I) {
+    Expr *OriginalExpr = ExprList[I];
+    PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(OriginalExpr);
+    if (!PPEmbed) {
+      OutputExprList.push_back(OriginalExpr);
+      continue;
+    }
+    StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+    QualType ElementTy = PPEmbed->getType();
+    const size_t TargetWidth = Context.getTypeSize(ElementTy);
+    const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+    StringRef Data = DataLiteral->getBytes();
+    for (size_t ByteIndex = 0; ByteIndex < Data.size();
+         ByteIndex += BytesPerElement) {
+      ByteVals.clear();
+      for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+        if ((ValIndex % sizeof(uint64_t)) == 0) {
+          ByteVals.push_back(0);
+        }
+        const unsigned char DataByte = Data[ByteIndex + ValIndex];
+        ByteVals.back() |=
+            (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+      }
+      ArrayRef<uint64_t> ByteValsRef(ByteVals);
+      IntegerLiteral *IntLit =
+          IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                 ElementTy, DataLiteral->getBeginLoc());
+      OutputExprList.push_back(IntLit);
+    }
+  }
+  return PPEmbedExpr::Expanded;
+}
+
+StringRef Sema::GetLocationName(PPEmbedExprContext Context) const {
+  switch (Context) {
+  default:
+    llvm_unreachable("unhandled PPEmbedExprContext value");
+  case PPEEC__StaticAssert:
+    return "_Static_assert";
+  case PPEEC_StaticAssert:
+    return "static_assert";
+  }
+}
+
+bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
+                               PPEmbedExprContext PPEmbedContext,
+                               bool SingleAllowed) {
+  PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
+  if (!PPEmbed)
+    return true;
+
+  if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
+    E = ExpandSinglePPEmbedExpr(PPEmbed);
+    return true;
+  }
+
+  StringRef LocationName = GetLocationName(PPEmbedContext);
+  StringRef DiagnosticMessage =
+      (SingleAllowed ? "cannot use a preprocessor embed that expands to "
+                       "nothing or expands to "
+                       "more than one item in "
+                     : "cannot use a preprocessor embed in ");
+  Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
+      << DiagnosticMessage << 1 << LocationName;
+  return false;
+}
+
 bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,
                                         bool Diagnose) {
   if (!getLangOpts().ObjC)
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index ff370dd1e080b2b..234e678c71b1401 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1623,6 +1623,62 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
   return Param;
 }
 
+void Sema::ModifyTemplateArguments(
+    const TemplateTy &Template,
+    SmallVectorImpl<ParsedTemplateArgument> &TemplateArgs) {
+  SmallVector<uint64_t, 4> ByteVals{};
+  for (size_t I = 0; I < TemplateArgs.size();) {
+    ParsedTemplateArgument &OriginalArg = TemplateArgs[I];
+    if (OriginalArg.getKind() != ParsedTemplateArgument::NonType) {
+      ++I;
+      continue;
+    }
+    PPEmbedExpr *PPEmbed = dyn_cast<PPEmbedExpr>(OriginalArg.getAsExpr());
+    if (!PPEmbed) {
+      ++I;
+      continue;
+    }
+    auto TemplateArgListIt = TemplateArgs.erase(&OriginalArg);
+    const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context);
+    if (ExpectedDataElements == 0) {
+      // No ++I; already pointing at the right element!
+      continue;
+    }
+    StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral();
+    QualType ElementTy = PPEmbed->getType();
+    const size_t TargetWidth = Context.getTypeSize(ElementTy);
+    const size_t BytesPerElement = CHAR_BIT / TargetWidth;
+    StringRef Data = DataLiteral->getBytes();
+    size_t Insertions = 0;
+    for (size_t ByteIndex = 0; ByteIndex < Data.size();
+         ByteIndex += BytesPerElement) {
+      ByteVals.clear();
+      for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) {
+        if ((ValIndex % sizeof(uint64_t)) == 0) {
+          ByteVals.push_back(0);
+        }
+        const unsigned char DataByte = Data[ByteIndex + ValIndex];
+        ByteVals.back() |=
+            (static_cast<uint64_t>(DataByte) << (ValIndex * CHAR_BIT));
+      }
+      ArrayRef<uint64_t> ByteValsRef(ByteVals);
+      IntegerLiteral *IntLit =
+          IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef),
+                                 ElementTy, DataLiteral->getBeginLoc());
+      TemplateArgListIt = TemplateArgs.insert(
+          TemplateArgListIt,
+          ParsedTemplateArgument(ParsedTemplateArgument::NonType, IntLit,
+                                 OriginalArg.getLocation()));
+      ++Insertions;
+      // make sure we are inserting **after** the item we just inserted, not
+      // before
+      ++TemplateArgListIt;
+    }
+    assert(Insertions == ExpectedDataElements);
+    I += Insertions;
+  }
+}
+
 /// ActOnTemplateTemplateParameter - Called when a C++ template template
 /// parameter (e.g. T in template <template \<typename> class T> class array)
 /// has been parsed. S is the current scope.
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8fafdd4f5caa1ed..ed5a03393d4adb5 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12127,6 +12127,12 @@ ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
                                            getSema().CurContext);
 }
 
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformPPEmbedExpr(PPEmbedExpr *E) {
+  // TODO: fully implement for tree transformations
+  return E;
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 1bdc3fa3bea455a..9acf786cf3cc463 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1297,6 +1297,15 @@ void ASTStmtReader::VisitSourceLocExpr(SourceLocExpr *E) {
       static_cast<SourceLocExpr::IdentKind>(Record.readInt());
 }
 
+void ASTStmtReader::VisitPPEmbedExpr(PPEmbedExpr *E) {
+  VisitExpr(E);
+  E->ParentContext = readDeclAs<DeclContext>();
+  E->BuiltinLoc = readSourceLocation();
+  E->RParenLoc = readSourceLocation();
+  E->Filename = cast<StringLiteral>(Record.readSubStmt());
+  E->BinaryData = cast<StringLiteral>(Record.readSubStmt());
+}
+
 void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
   E->setAmpAmpLoc(readSourceLocation());
@@ -3121,6 +3130,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) SourceLocExpr(Empty);
       break;
 
+    case EXPR_BUILTIN_PP_EMBED:
+      S = new (Context) PPEmbedExpr(Empty);
+      break;
+
     case EXPR_ADDR_LABEL:
       S = new (Context) AddrLabelExpr(Empty);
       break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 125ca17c0c1212e..482daabe30f8349 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1169,6 +1169,16 @@ void ASTStmtWriter::VisitSourceLocExpr(SourceLocExpr *E) {
   Code = serialization::EXPR_SOURCE_LOC;
 }
 
+void ASTStmtWriter::VisitPPEmbedExpr(PPEmbedExpr *E) {
+  VisitExpr(E);
+  Record.AddDeclRef(cast_or_null<Decl>(E->getParentContext()));
+  Record.AddSourceLocation(E->getBeginLoc());
+  Record.AddSourceLocation(E->getEndLoc());
+  Record.AddStmt(E->getFilenameStringLiteral());
+  Record.AddStmt(E->getDataStringLiteral());
+  Code = serialization::EXPR_BUILTIN_PP_EMBED;
+}
+
 void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
   Record.AddSourceLocation(E->getAmpAmpLoc());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 451ee91b94533d5..70347fb9ffb2ca7 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2411,6 +2411,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
+
+    case Stmt::PPEmbedExprClass:
+      llvm_unreachable("Support for PPEmbedExpr is not implemented.");
+      break;
   }
 }
 
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
new file mode 100644
index 000000000000000..1639fb7af7f07b0
--- /dev/null
+++ b/clang/test/Preprocessor/embed_art.c
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+const char data[] = {
+#embed <media/art.txt>
+};
+const char data2[] = {
+#embed <media/art.txt>
+, 0
+};
+const char data3[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const char data4[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+_Static_assert(sizeof(data) == 274, "");
+_Static_assert(' ' == data[0], "");
+_Static_assert('_' == data[11], "");
+_Static_assert('\n' == data[273], "");
+_Static_assert(sizeof(data2) == 275, "");
+_Static_assert(' ' == data2[0], "");
+_Static_assert('_' == data2[11], "");
+_Static_assert('\n' == data2[273], "");
+_Static_assert('\0' == data2[274], "");
+_Static_assert(sizeof(data3) == 275, "");
+_Static_assert(' ' == data3[0], "");
+_Static_assert('_' == data3[11], "");
+_Static_assert('\n' == data3[273], "");
+_Static_assert('\0' == data3[274], "");
+_Static_assert(sizeof(data4) == 275, "");
+_Static_assert(' ' == data4[0], "");
+_Static_assert('_' == data4[11], "");
+_Static_assert('\n' == data4[273], "");
+_Static_assert('\0' == data4[274], "");
+
+const signed char data5[] = {
+#embed <media/art.txt>
+};
+const signed char data6[] = {
+#embed <media/art.txt>
+, 0
+};
+const signed char data7[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const signed char data8[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+_Static_assert(sizeof(data5) == 274, "");
+_Static_assert(' ' == data5[0], "");
+_Static_assert('_' == data5[11], "");
+_Static_assert('\n' == data5[273], "");
+_Static_assert(sizeof(data6) == 275, "");
+_Static_assert(' ' == data6[0], "");
+_Static_assert('_' == data6[11], "");
+_Static_assert('\n' == data6[273], "");
+_Static_assert('\0' == data6[274], "");
+_Static_assert(sizeof(data7) == 275, "");
+_Static_assert(' ' == data7[0], "");
+_Static_assert('_' == data7[11], "");
+_Static_assert('\n' == data7[273], "");
+_Static_assert('\0' == data7[274], "");
+_Static_assert(sizeof(data8) == 275, "");
+_Static_assert(' ' == data8[0], "");
+_Static_assert('_' == data8[11], "");
+_Static_assert('\n' == data8[273], "");
+_Static_assert('\0' == data8[274], "");
+
+const unsigned char data9[] = {
+#embed <media/art.txt>
+};
+const unsigned char data10[] = {
+0,
+#embed <media/art.txt>
+};
+const unsigned char data11[] = {
+#embed <media/art.txt> prefix(0,)
+};
+const unsigned char data12[] = {
+0
+#embed <media/art.txt> prefix(,)
+};
+_Static_assert(sizeof(data9) == 274, "");
+_Static_assert(' ' == data9[0], "");
+_Static_assert('_' == data9[11], "");
+_Static_assert('\n' == data9[273], "");
+_Static_assert(sizeof(data10) == 275, "");
+_Static_assert(' ' == data10[1], "");
+_Static_assert('_' == data10[12], "");
+_Static_assert('\n' == data10[274], "");
+_Static_assert('\0' == data10[0], "");
+_Static_assert(sizeof(data11) == 275, "");
+_Static_assert(' ' == data11[1], "");
+_Static_assert('_' == data11[12], "");
+_Static_assert('\n' == data11[274], "");
+_Static_assert('\0' == data11[0], "");
+_Static_assert(sizeof(data12) == 275, "");
+_Static_assert(' ' == data12[1], "");
+_Static_assert('_' == data12[12], "");
+_Static_assert('\n' == data12[274], "");
+_Static_assert('\0' == data12[0], "");
+
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
new file mode 100644
index 000000000000000..3be4e1c2a6cf870
--- /dev/null
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+const char data =
+#embed "single_byte.txt"
+;
+_Static_assert('a' == data[0]);
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
new file mode 100644
index 000000000000000..5971a75ee000bbf
--- /dev/null
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -x c %s -fsyntax-only -embed-dir=%S/Inputs -verify
+#embed <media/empty>
+;
+
+void f (unsigned char x) { (void)x;}
+void g () {}
+void h (unsigned char x, int y) {(void)x; (void)y;}
+int i () {
+	return
+#embed <single_byte.txt>
+		;
+}
+
+_Static_assert(
+#embed <single_byte.txt> suffix(,)
+""
+);
+_Static_assert(
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <single_byte.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+_Static_assert(sizeof
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <jk.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+
+#ifdef __cplusplus
+template <int First, int Second>
+void j() {
+	static_assert(First == 'j', "");
+	static_assert(Second == 'k', "");
+}
+#endif
+
+void do_stuff() {
+	f(
+#embed <single_byte.txt>
+	);
+	g(
+#embed <media/empty>
+	);
+	h(
+#embed <jk.txt>
+	);
+	int r = i();
+	(void)r;
+#ifdef __cplusplus
+	j<
+#embed <jk.txt>
+	>(
+#embed <media/empty>
+	);
+#endif
+}
+// expected-no-diagnostics
diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h
index 3d96884749b32f4..8fcef706e916733 100644
--- a/llvm/include/llvm/Support/Base64.h
+++ b/llvm/include/llvm/Support/Base64.h
@@ -20,37 +20,43 @@
 
 namespace llvm {
 
-template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+template <class InputBytes, class OutputContainer>
+void encodeBase64(InputBytes const &Bytes, OutputContainer &OutputBuffer) {
   static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                               "abcdefghijklmnopqrstuvwxyz"
                               "0123456789+/";
-  std::string Buffer;
-  Buffer.resize(((Bytes.size() + 2) / 3) * 4);
+  const std::size_t IndexOffset = OutputBuffer.size();
+  OutputBuffer.resize(OutputBuffer.size() + (((Bytes.size() + 2) / 3) * 4));
 
   size_t i = 0, j = 0;
   for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) {
     uint32_t x = ((unsigned char)Bytes[i] << 16) |
                  ((unsigned char)Bytes[i + 1] << 8) |
                  (unsigned char)Bytes[i + 2];
-    Buffer[j + 0] = Table[(x >> 18) & 63];
-    Buffer[j + 1] = Table[(x >> 12) & 63];
-    Buffer[j + 2] = Table[(x >> 6) & 63];
-    Buffer[j + 3] = Table[x & 63];
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = Table[x & 63];
   }
   if (i + 1 == Bytes.size()) {
     uint32_t x = ((unsigned char)Bytes[i] << 16);
-    Buffer[j + 0] = Table[(x >> 18) & 63];
-    Buffer[j + 1] = Table[(x >> 12) & 63];
-    Buffer[j + 2] = '=';
-    Buffer[j + 3] = '=';
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = '=';
+    OutputBuffer[IndexOffset + j + 3] = '=';
   } else if (i + 2 == Bytes.size()) {
     uint32_t x =
         ((unsigned char)Bytes[i] << 16) | ((unsigned char)Bytes[i + 1] << 8);
-    Buffer[j + 0] = Table[(x >> 18) & 63];
-    Buffer[j + 1] = Table[(x >> 12) & 63];
-    Buffer[j + 2] = Table[(x >> 6) & 63];
-    Buffer[j + 3] = '=';
+    OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63];
+    OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63];
+    OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63];
+    OutputBuffer[IndexOffset + j + 3] = '=';
   }
+}
+
+template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+  std::string Buffer;
+  encodeBase64(Bytes, Buffer);
   return Buffer;
 }
 

>From 77aad07644b135196511dfe1d60bc08617e9d72b Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 10:31:54 -0500
Subject: [PATCH 03/23] Update based on API changes in community

---
 clang/lib/Parse/ParseExpr.cpp | 2 +-
 clang/lib/Sema/SemaExpr.cpp   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 25da32ee0b88c1d..4c96b62b3e323bd 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2918,7 +2918,7 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     } else {
       StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
       StringRef Base64StrData = Base64Str->getBytes();
-      if (Base64Str->getKind() != StringLiteral::Ordinary) {
+      if (Base64Str->getKind() != StringLiteralKind::Ordinary) {
         Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
             << 0
             << "'__builtin_pp_embed' with valid base64 encoding that is an "
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index be1f22bc93dde7f..87626d6af05d7d0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17609,10 +17609,10 @@ ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc,
   llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()),
                                      1, ArraySizeRawVal));
   QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr,
-                                                  ArrayType::Normal, 0);
+                                                  ArraySizeModifier::Normal, 0);
   StringLiteral *BinaryDataLiteral = StringLiteral::Create(
       Context, StringRef(BinaryData.data(), BinaryData.size()),
-      StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation);
+      StringLiteralKind::Ordinary, false, ArrayTy, Base64DataLocation);
   return new (Context)
       PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc,
                   RPLoc, CurContext);

>From 1cca72573478b5572d10721e9c94f2aea2d7e394 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:14:26 -0500
Subject: [PATCH 04/23] We don't yet expose a libclang cursor for embed
 expressions

---
 clang/tools/libclang/CXCursor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index fd03c48ba1a42aa..08f5830afaa9625 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -335,6 +335,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::ObjCSubscriptRefExprClass:
   case Stmt::RecoveryExprClass:
   case Stmt::SYCLUniqueStableNameExprClass:
+  case Stmt::PPEmbedExprClass:
     K = CXCursor_UnexposedExpr;
     break;
 

>From cd6142dc5899dd55ca693665ea313521db750d74 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:40:24 -0500
Subject: [PATCH 05/23] Update preprocessor tests for new builtin macros

---
 clang/test/Preprocessor/init-aarch64.c | 3 +++
 clang/test/Preprocessor/init.c         | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index 2b7cc57f2303333..b666fa99f39b42f 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -262,6 +262,9 @@
 // AARCH64-NEXT: #define __SIZE_WIDTH__ 64
 // AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL
 // AARCH64_CXX: #define __STDCPP_THREADS__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2
+// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0
 // AARCH64-NEXT: #define __STDC_HOSTED__ 1
 // AARCH64-NEXT: #define __STDC_UTF_16__ 1
 // AARCH64-NEXT: #define __STDC_UTF_32__ 1
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index a0a2879cb58c7fc..0f728a69c34e561 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -1797,6 +1797,9 @@
 // WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
 // WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0
 // WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
 // WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
 // WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__

>From 495f1d49d3e88c294be43e752ef699c267f67f8c Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 11:40:52 -0500
Subject: [PATCH 06/23] Fix logical think-o with the test

---
 clang/test/Preprocessor/embed_single_entity.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
index 3be4e1c2a6cf870..8cbee2a93626152 100644
--- a/clang/test/Preprocessor/embed_single_entity.c
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 -embed-dir=%S/Inputs -verify
 
 const char data =
 #embed "single_byte.txt"
 ;
-_Static_assert('a' == data[0]);
+_Static_assert('b' == data);
 // expected-no-diagnostics

>From 680c3798811c3df1c8e92181a79655b24349ebce Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 13:05:09 -0500
Subject: [PATCH 07/23] Fix -Wreorder diagnostics; NFC

---
 clang/include/clang/Lex/PPEmbedParameters.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index 7b76d2d573c23bd..dfc835ecfc835af 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -27,7 +27,7 @@ class PPEmbedParameterOffset : public PPDirectiveParameter {
 
   PPEmbedParameterOffset(size_t Offset, SourceLocation Start,
                          SourceLocation End)
-      : Offset(Offset), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Offset(Offset) {}
 };
 
 /// Preprocessor standard embed parameter "limit"
@@ -37,7 +37,7 @@ class PPEmbedParameterLimit : public PPDirectiveParameter {
   size_t Limit;
 
   PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End)
-      : Limit(Limit), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Limit(Limit) {}
 };
 
 /// Preprocessor standard embed parameter "prefix"
@@ -48,7 +48,7 @@ class PPEmbedParameterPrefix : public PPDirectiveParameter {
 
   PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
                          SourceLocation End)
-      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
 /// Preprocessor standard embed parameter "suffix"
@@ -59,7 +59,7 @@ class PPEmbedParameterSuffix : public PPDirectiveParameter {
 
   PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
                          SourceLocation End)
-      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
 /// Preprocessor standard embed parameter "if_empty"
@@ -70,7 +70,7 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
 
   PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
                           SourceLocation End)
-      : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {}
+      : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
 
 } // end namespace clang

>From a0f8278db25809e3fc397edaac909ef809931567 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 13:15:37 -0500
Subject: [PATCH 08/23] Clean up these constructors to take a SmallVectorImpl

This way we're not tied to a SmallVector<Token, 2> specifically in callers.
---
 clang/include/clang/Lex/PPEmbedParameters.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
index dfc835ecfc835af..f6de84bdc915148 100644
--- a/clang/include/clang/Lex/PPEmbedParameters.h
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -46,7 +46,7 @@ class PPEmbedParameterPrefix : public PPDirectiveParameter {
 public:
   SmallVector<Token, 2> Tokens;
 
-  PPEmbedParameterPrefix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+  PPEmbedParameterPrefix(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
                          SourceLocation End)
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
@@ -57,7 +57,7 @@ class PPEmbedParameterSuffix : public PPDirectiveParameter {
 public:
   SmallVector<Token, 2> Tokens;
 
-  PPEmbedParameterSuffix(SmallVector<Token, 2> Tokens, SourceLocation Start,
+  PPEmbedParameterSuffix(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
                          SourceLocation End)
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };
@@ -68,7 +68,7 @@ class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
 public:
   SmallVector<Token, 2> Tokens;
 
-  PPEmbedParameterIfEmpty(SmallVector<Token, 2> Tokens, SourceLocation Start,
+  PPEmbedParameterIfEmpty(SmallVectorImpl<Token> &&Tokens, SourceLocation Start,
                           SourceLocation End)
       : PPDirectiveParameter(Start, End), Tokens(std::move(Tokens)) {}
 };

>From 4d9ed9e2f4bd27013681461edda6768ebbb7aaa1 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:13:32 -0500
Subject: [PATCH 09/23] Fix a crash with argument parsing

If the user passes -fno-builtin, then the call to getValue() will
assert due to an out of bounds access. So we check to see which form
the user passes (-fno-builtin or -fno-builtin-pp_embed).

Additionally, we need to round trip the argument properly depending on
which form the user passed.
---
 clang/lib/Frontend/CompilerInvocation.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index ce1341421bab694..6660a116dad8022 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4330,8 +4330,14 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
   for (const auto &EmbedEntry : Opts.EmbedEntries)
     GenerateArg(Consumer, OPT_embed_dir, EmbedEntry);
 
-  if (Opts.NoBuiltinPPEmbed)
-    GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+  if (Opts.NoBuiltinPPEmbed) {
+    // We need to figure out whether the user passed -fno-builtins or
+    // specifically disabled pp_embed. If NoBuiltin is true, we don't need to
+    // generate an arg because that disables everything. Otherwise, we assume
+    // the user passed -fno-builtin-pp_embed and generate that.
+    if (!LangOpts.NoBuiltin)
+      GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed");
+  }
 
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
@@ -4432,10 +4438,12 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
 
   // Can disable the internal embed builtin / token
   for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) {
-    StringRef Val = A->getValue();
-    if (Val == "pp_embed") {
-      Opts.NoBuiltinPPEmbed = true;
-    }
+    bool NoBuiltinEmbed = false;
+    if (A->getNumValues())
+      NoBuiltinEmbed = A->getValue() == StringRef("pp_embed");
+    else
+      NoBuiltinEmbed = true; // All builtins are disabled.
+    Opts.NoBuiltinPPEmbed = NoBuiltinEmbed;
   }
 
   // Always avoid lexing editor placeholders when we're just running the

>From 8a466f3354cbf862a3bc1edd71c32289f337ebb0 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:55:36 -0500
Subject: [PATCH 10/23] Back out unrelated CMake changes

---
 clang/CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 1b88905da3b8597..9b52c58be41e7f7 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -300,7 +300,6 @@ configure_file(
   ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc)
 
 # Add appropriate flags for GCC
-option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
   if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -308,7 +307,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
   endif ()
 
   # Enable -pedantic for Clang even if it's not enabled for LLVM.
-  if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC)
+  if (NOT LLVM_ENABLE_PEDANTIC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long")
   endif ()
 

>From a3d4b13f9dbd9d11bbd8f619de3ac888a880bf82 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 14:57:50 -0500
Subject: [PATCH 11/23] Remove a spurious #undef; NFC

---
 clang/include/clang/Basic/TokenKinds.def | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 6b726463f0cdd31..613f6d64eb8bdc9 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -991,7 +991,6 @@ ANNOTATION(repl_input_end)
 #undef CXX11_KEYWORD
 #undef KEYWORD
 #undef PUNCTUATOR
-#undef BUILTINOK
 #undef TOK
 #undef C99_KEYWORD
 #undef C23_KEYWORD

>From 7dad1be74cc40cbb1694d58e8f7553c8741634ec Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Tue, 7 Nov 2023 15:12:17 -0500
Subject: [PATCH 12/23] Backing out more unnecessary CMake changes

---
 llvm/CMakeLists.txt                    | 7 -------
 llvm/cmake/modules/GetHostTriple.cmake | 6 +++---
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index cb049ccb7d9c8cd..7ff3acd48304de7 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -780,13 +780,6 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
     "Semicolon-separated list of components to include in libLLVM, or \"all\".")
 endif()
 
-option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON)
-# Quiet down MSVC-style secure CRT warnings
-if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS)
-  add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1)
-endif()
-
-
 if(MSVC)
   option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON)
   # Set this variable to OFF here so it can't be set with a command-line
diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake
index 828227f2f25a2f0..1be13bc01ab9b25 100644
--- a/llvm/cmake/modules/GetHostTriple.cmake
+++ b/llvm/cmake/modules/GetHostTriple.cmake
@@ -2,7 +2,7 @@
 # Invokes config.guess
 
 function( get_host_triple var )
-  if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") )
+  if( MSVC )
     if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" )
       set( value "aarch64-pc-windows-msvc" )
     elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" )
@@ -41,7 +41,7 @@ function( get_host_triple var )
     else()
       set( value "powerpc-ibm-aix" )
     endif()
-  else()
+  else( MSVC )
     if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS)
       message(WARNING "unable to determine host target triple")
     else()
@@ -55,6 +55,6 @@ function( get_host_triple var )
       endif( NOT TT_RV EQUAL 0 )
       set( value ${TT_OUT} )
     endif()
-  endif()
+  endif( MSVC )
   set( ${var} ${value} PARENT_SCOPE )
 endfunction( get_host_triple var )

>From 29ac376978331a6453575004814cb8e9364bd933 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 07:16:21 -0500
Subject: [PATCH 13/23] Correct the logic for this diagnostic checking function

This fixes a few hundred failing test cases for me; still several left
failing though.
---
 clang/lib/Sema/SemaExpr.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 87626d6af05d7d0..c932abf8d931906 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17801,11 +17801,11 @@ bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
                                bool SingleAllowed) {
   PPEmbedExpr *PPEmbed = dyn_cast_if_present<PPEmbedExpr>(E);
   if (!PPEmbed)
-    return true;
+    return false;
 
   if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) {
     E = ExpandSinglePPEmbedExpr(PPEmbed);
-    return true;
+    return false;
   }
 
   StringRef LocationName = GetLocationName(PPEmbedContext);
@@ -17816,7 +17816,7 @@ bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation,
                      : "cannot use a preprocessor embed in ");
   Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location)
       << DiagnosticMessage << 1 << LocationName;
-  return false;
+  return true;
 }
 
 bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp,

>From e4e28eb990098d8a203013d946dd5a4243a8fb0f Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 07:38:23 -0500
Subject: [PATCH 14/23] Fix think-o with test to get it to pass

---
 clang/test/Preprocessor/embed_path_quote.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
index 791cd9176ebe0ab..7e39d9be3b0a523 100644
--- a/clang/test/Preprocessor/embed_path_quote.c
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -4,5 +4,5 @@ const char data[] = {
 #embed "single_byte.txt"
 };
 _Static_assert(sizeof(data) == 1, "");
-_Static_assert('a' == data[0], "");
+_Static_assert('b' == data[0], "");
 // expected-no-diagnostics

>From ab5f8c204d03bab9bd516c299a478b0d72467b01 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 08:50:21 -0500
Subject: [PATCH 15/23] Restore previous behavior; fixes two more failing test
 cases

---
 clang/lib/Lex/PPExpressions.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index dda5717afc699da..e0bd73e8680921c 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -935,10 +935,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    const bool IsNonZero = ResVal.Val != 0;
     const SourceRange ValRange = ResVal.getRange();
-    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
-            ValRange};
+    return {std::move(ResVal.Val), false, DT.IncludedUndefinedIds, ValRange};
   }
 
   if (CheckForEoD) {

>From 9d5eadfc04ed7276bab79321294b6bff4f35bb85 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 09:37:59 -0500
Subject: [PATCH 16/23] Clean up the way we expose the __STDC_EMBED_*__ macros;
 NFC

---
 clang/include/clang/Lex/Preprocessor.h  | 11 +++++----
 clang/lib/Frontend/InitPreprocessor.cpp |  9 +++++---
 clang/lib/Lex/PPMacroExpansion.cpp      | 30 ++++++++++++-------------
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index ea461aba0611f0f..8db920ad2dc6610 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -122,6 +122,12 @@ enum MacroUse {
   MU_Undef  = 2
 };
 
+enum class EmbedResult {
+  NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
+  Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
+  Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
+};
+
 /// Engages in a tight little dance with the lexer to efficiently
 /// preprocess tokens.
 ///
@@ -211,9 +217,6 @@ class Preprocessor {
   enum {
     /// Maximum depth of \#includes.
     MaxAllowedIncludeStackDepth = 200,
-    VALUE__STDC_EMBED_NOT_FOUND__ = 0,
-    VALUE__STDC_EMBED_FOUND__ = 1,
-    VALUE__STDC_EMBED_EMPTY__ = 2,
   };
 
   // State that is set before the preprocessor begins.
@@ -2584,7 +2587,7 @@ class Preprocessor {
   ///
   /// Returns predefined `__STDC_EMBED_*` macro values if
   /// successful.
-  int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
+  EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
 
   /// Process a '__has_include("path")' expression.
   ///
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b7d084773b0a195..cc9c6733f442968 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -499,9 +499,12 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__STDC_UTF_32__", "1");
 
   // __has_embed definitions
-  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0");
-  Builder.defineMacro("__STDC_EMBED_FOUND__", "1");
-  Builder.defineMacro("__STDC_EMBED_EMPTY__", "2");
+  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__",
+                      llvm::itostr(static_cast<int>(EmbedResult::NotFound)));
+  Builder.defineMacro("__STDC_EMBED_FOUND__",
+                      llvm::itostr(static_cast<int>(EmbedResult::Found)));
+  Builder.defineMacro("__STDC_EMBED_EMPTY__",
+                      llvm::itostr(static_cast<int>(EmbedResult::Empty)));
 
   if (LangOpts.ObjC)
     Builder.defineMacro("__OBJC__");
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index b25faf8c873d389..a55bc719328ad62 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1272,7 +1272,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
 
 /// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
 /// Returns a filled optional with the value if successful; otherwise, empty.
-int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   // pedwarn for not being on C23
   if (!LangOpts.C23 || !LangOpts.CPlusPlus26) {
     auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed
@@ -1290,13 +1290,13 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     // Return a valid identifier token.
     assert(Tok.is(tok::identifier));
     Tok.setIdentifierInfo(II);
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   // Get '('. If we don't have a '(', try to form a header-name token.
   do {
     if (this->LexHeaderName(Tok)) {
-      return VALUE__STDC_EMBED_NOT_FOUND__;
+      return EmbedResult::NotFound;
     }
   } while (Tok.getKind() == tok::comment);
 
@@ -1308,19 +1308,19 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     // If the next token looks like a filename or the start of one,
     // assume it is and process it as such.
     if (Tok.isNot(tok::header_name)) {
-      return VALUE__STDC_EMBED_NOT_FOUND__;
+      return EmbedResult::NotFound;
     }
   } else {
     // Save '(' location for possible missing ')' message.
     LParenLoc = Tok.getLocation();
     if (this->LexHeaderName(Tok)) {
-      return VALUE__STDC_EMBED_NOT_FOUND__;
+      return EmbedResult::NotFound;
     }
   }
 
   if (Tok.isNot(tok::header_name)) {
     Diag(Tok.getLocation(), diag::err_pp_expects_filename);
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   SourceLocation FilenameLoc = Tok.getLocation();
@@ -1331,10 +1331,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
   if (!Params.Successful) {
     if (Tok.isNot(tok::eod))
       this->DiscardUntilEndOfDirective();
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
   if (Params.UnrecognizedParams > 0) {
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   if (!Tok.is(tok::r_paren)) {
@@ -1342,7 +1342,7 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
         << II << tok::r_paren;
     Diag(LParenLoc, diag::note_matching) << tok::l_paren;
     DiscardUntilEndOfDirective();
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
 
   SmallString<128> FilenameBuffer;
@@ -1364,7 +1364,7 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
   }
   if (!MaybeFileEntry) {
-    return VALUE__STDC_EMBED_NOT_FOUND__;
+    return EmbedResult::NotFound;
   }
   size_t FileSize = MaybeFileEntry->getSize();
   if (Params.MaybeLimitParam) {
@@ -1373,12 +1373,12 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     }
   }
   if (FileSize == 0) {
-    return VALUE__STDC_EMBED_EMPTY__;
+    return EmbedResult::Empty;
   }
   if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) {
-    return VALUE__STDC_EMBED_EMPTY__;
+    return EmbedResult::Empty;
   }
-  return VALUE__STDC_EMBED_FOUND__;
+  return EmbedResult::Found;
 }
 
 bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
@@ -1923,11 +1923,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     // file name string literal using angle brackets (<>) or
     // double-quotes (""), optionally followed by a series of
     // arguments similar to form like attributes.
-    int Value = EvaluateHasEmbed(Tok, II);
+    EmbedResult Value = EvaluateHasEmbed(Tok, II);
 
     if (Tok.isNot(tok::r_paren))
       return;
-    OS << Value;
+    OS << static_cast<int>(Value);
     Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.

>From f88a1aec9865fdd3cb44aaa45d4d141a6195854e Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 09:51:08 -0500
Subject: [PATCH 17/23] Fix a broken pp-trace test

The test needs to care about the three new predefined macros.
---
 clang-tools-extra/test/pp-trace/pp-trace-macro.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
index 1d85607e86b7fff..7c2a231101070d7 100644
--- a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
+++ b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
@@ -31,6 +31,15 @@ X
 // CHECK:        MacroNameTok: __STDC_UTF_32__
 // CHECK-NEXT:   MacroDirective: MD_Define
 // CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_NOT_FOUND__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_FOUND__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_EMPTY__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
 // CHECK:      - Callback: MacroDefined
 // CHECK-NEXT:   MacroNameTok: MACRO
 // CHECK-NEXT:   MacroDirective: MD_Define

>From e7ef292e0e61591eaf3bda238265f45a3e468e48 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 11:56:35 -0500
Subject: [PATCH 18/23] Remove __builtin_pp_embed as a builtin function; NFC

This is a weird builtin function that's more like __builtin_offsetof
in that it takes a type argument. Therefore, it's not really a function
call like other builtins (we wouldn't check its validity in
SemaChecking.cpp).
---
 clang/include/clang/Basic/Builtins.def | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index fa3d83d1a34bec0..ec39e926889b936 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1770,9 +1770,6 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
 // Arithmetic Fence: to prevent FP reordering and reassociation optimizations
 LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES)
 
-// preprocessor embed builtin
-LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES)
-
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN

>From 7c6bc7b776be54f7dca27ce34222c9ca7b1beda4 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:00:02 -0500
Subject: [PATCH 19/23] Add a test for feature testing the builtin

---
 clang/test/Preprocessor/embed_builtin.cpp | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 clang/test/Preprocessor/embed_builtin.cpp

diff --git a/clang/test/Preprocessor/embed_builtin.cpp b/clang/test/Preprocessor/embed_builtin.cpp
new file mode 100644
index 000000000000000..d2547fa0c3f668d
--- /dev/null
+++ b/clang/test/Preprocessor/embed_builtin.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+#if !__has_builtin(__builtin_pp_embed)
+#error "Don't have __builtin_pp_embed?"
+#endif

>From 038c90d4e9dc2c17900064b7e059061165b6d993 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:22:00 -0500
Subject: [PATCH 20/23] Correct parsing behavior and add tests

There is likely more work to be done here to split parsing and semantic
concerns. This also pointed out an issue where __builtin_pp_embed seems
to have a non-void return type, but who knows what it actually returns
as a value.
---
 .../clang/Basic/DiagnosticCommonKinds.td      |   2 +-
 clang/lib/Parse/ParseExpr.cpp                 | 118 +++++++++---------
 clang/test/Parser/embed_builtin.cpp           |  14 +++
 3 files changed, 75 insertions(+), 59 deletions(-)
 create mode 100644 clang/test/Parser/embed_builtin.cpp

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index b2e770b540944e3..6368f0ceeac3274 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -57,7 +57,7 @@ def err_expected_string_literal : Error<"expected string literal "
           "for optional message in 'availability' attribute|"
           "for %select{language name|source container name|USR}1 in "
           "'external_source_symbol' attribute|"
-          "as argument of '%1' attribute}0">;
+          "as argument of '%1' attribute|as the %ordinal1 argument}0">;
 
 def err_builtin_pp_embed_invalid_argument : Error<
   "invalid argument to '__builtin_pp_embed': %0">;
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 4c96b62b3e323bd..03d181586f83b5a 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2858,86 +2858,88 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
     break;
   }
   case tok::kw___builtin_pp_embed: {
-    SourceRange DataTyExprSourceRange{};
+    // __builtin_pp_embed( type-name , string-literal , string-literal )
+    SourceRange DataTyExprSourceRange;
     TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange));
 
+    if (DataTyExpr.isInvalid()) {
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return ExprError();
+    }
+
     if (ExpectAndConsume(tok::comma)) {
       SkipUntil(tok::r_paren, StopAtSemi);
-      Res = ExprError();
+      return ExprError();
     }
 
-    ExprResult FilenameArgExpr(ParseStringLiteralExpression());
+    if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+      Diag(Tok, diag::err_expected_string_literal)
+          << /*as argument*/ 5 << /*second argument*/ 2;
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return ExprError();
+    }
+    ExprResult FilenameArgExpr(ParseUnevaluatedStringLiteralExpression());
 
-    if (ExpectAndConsume(tok::comma)) {
+    if (FilenameArgExpr.isInvalid() || ExpectAndConsume(tok::comma)) {
       SkipUntil(tok::r_paren, StopAtSemi);
-      Res = ExprError();
+      return ExprError();
     }
 
-    ExprResult Base64ArgExpr(ParseStringLiteralExpression());
+    if (!tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+      Diag(Tok, diag::err_expected_string_literal)
+          << /*as argument*/ 5 << /*third argument*/ 3;
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return ExprError();
+    }
+    ExprResult Base64ArgExpr(ParseUnevaluatedStringLiteralExpression());
 
-    if (Tok.isNot(tok::r_paren)) {
+    if (Base64ArgExpr.isInvalid() || Tok.isNot(tok::r_paren)) {
       Diag(Tok, diag::err_expected) << tok::r_paren;
-      Res = ExprError();
+      return ExprError();
     }
 
     const ASTContext &Context = Actions.getASTContext();
-    QualType DataTy = Context.UnsignedCharTy;
+    QualType DataTy = DataTyExpr.get().get().getCanonicalType();
     size_t TargetWidth = Context.getTypeSize(DataTy);
-    if (DataTyExpr.isInvalid()) {
+    if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
+        DataTy.getUnqualifiedType() != Context.CharTy) {
+      // TODO: check if is exactly the same as unsigned char
+      Diag(DataTyExprSourceRange.getBegin(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "only 'char' and 'unsigned char' are supported";
       Res = ExprError();
-    } else {
-      DataTy = DataTyExpr.get().get().getCanonicalType();
-      TargetWidth = Context.getTypeSize(DataTy);
-      if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy &&
-          DataTy.getUnqualifiedType() != Context.CharTy) {
-        // TODO: check if is exactly the same as unsigned char
-        Diag(DataTyExprSourceRange.getBegin(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "only 'char' and 'unsigned char' are supported";
-        Res = ExprError();
-      }
-      if ((TargetWidth % CHAR_BIT) != 0) {
-        Diag(DataTyExprSourceRange.getBegin(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "width of element type is not a multiple of host platform's "
-               "CHAR_BIT!";
-        Res = ExprError();
-      }
     }
-
-    StringLiteral *FilenameLiteral = nullptr;
-    if (FilenameArgExpr.isInvalid()) {
+    if ((TargetWidth % CHAR_BIT) != 0) {
+      Diag(DataTyExprSourceRange.getBegin(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "width of element type is not a multiple of host platform's "
+              "CHAR_BIT!";
       Res = ExprError();
-    } else {
-      FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
     }
 
-    std::vector<char> BinaryData{};
-    if (Base64ArgExpr.isInvalid()) {
+    StringLiteral *FilenameLiteral = FilenameArgExpr.getAs<StringLiteral>();
+    std::vector<char> BinaryData;
+    StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
+    StringRef Base64StrData = Base64Str->getBytes();
+    if (Base64Str->getKind() != StringLiteralKind::Unevaluated) {
+      Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
+          << 0
+          << "'__builtin_pp_embed' with valid base64 encoding that is an "
+              "ordinary \"...\" string";
+    }
+    const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
+      Diag(Base64Str->getExprLoc(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "expected a valid base64 encoded string";
+    };
+    llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
+    llvm::handleAllErrors(std::move(Err), OnDecodeError);
+    if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
+      Diag(DataTyExprSourceRange.getBegin(),
+            diag::err_builtin_pp_embed_invalid_argument)
+          << "size of data does not split evently into the number of bytes "
+              "requested";
       Res = ExprError();
-    } else {
-      StringLiteral *Base64Str = Base64ArgExpr.getAs<StringLiteral>();
-      StringRef Base64StrData = Base64Str->getBytes();
-      if (Base64Str->getKind() != StringLiteralKind::Ordinary) {
-        Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal)
-            << 0
-            << "'__builtin_pp_embed' with valid base64 encoding that is an "
-               "ordinary \"...\" string";
-      }
-      const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) {
-        Diag(Base64Str->getExprLoc(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "expected a valid base64 encoded string";
-      };
-      llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData);
-      llvm::handleAllErrors(std::move(Err), OnDecodeError);
-      if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) {
-        Diag(DataTyExprSourceRange.getBegin(),
-             diag::err_builtin_pp_embed_invalid_argument)
-            << "size of data does not split evently into the number of bytes "
-               "requested";
-        Res = ExprError();
-      }
     }
 
     if (!Res.isInvalid()) {
diff --git a/clang/test/Parser/embed_builtin.cpp b/clang/test/Parser/embed_builtin.cpp
new file mode 100644
index 000000000000000..487c11c393ad0ee
--- /dev/null
+++ b/clang/test/Parser/embed_builtin.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -verify
+
+void parsing_diags() {
+  __builtin_pp_embed;                   // expected-error {{expected '(' after '__builtin_pp_embed'}}
+  __builtin_pp_embed(;                  // expected-error {{expected a type}}
+  __builtin_pp_embed();                 // expected-error {{expected a type}}
+  __builtin_pp_embed(12);               // expected-error {{expected a type}}
+  __builtin_pp_embed(int);              // expected-error {{expected ','}}
+  __builtin_pp_embed(int, 12);          // expected-error {{expected string literal as the 2nd argument}}
+  __builtin_pp_embed(int, "", 12);      // expected-error {{expected string literal as the 3rd argument}}
+  __builtin_pp_embed(int, "", "", 12);  // expected-error {{expected ')'}}
+  (void)__builtin_pp_embed(char, L"", "");    // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
+  (void)__builtin_pp_embed(char, "", L"");    // expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}}
+}

>From c204b7358f2fcd495d495831ea71baa67f693711 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 8 Nov 2023 12:40:23 -0500
Subject: [PATCH 21/23] No longer expose the embed driver options to Flang

The options don't make sense outside of Clang currently.
---
 clang/include/clang/Driver/Options.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index aef200cc5729279..91c6ff70cad7236 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -832,11 +832,11 @@ def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
 def embed_dir : JoinedOrSeparate<["-"], "embed-dir">,
     Flags<[RenderJoined]>, Group<EmbedPath_Group>,
-    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    Visibility<[ClangOption, CC1Option]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
 def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">,
     Flags<[RenderJoined]>, Group<EmbedPath_Group>,
-    Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>,
+    Visibility<[ClangOption, CC1Option]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to embed search path">;
 def MD : Flag<["-"], "MD">, Group<M_Group>,
     HelpText<"Write a depfile containing user and system headers">;

>From ec01bec24f4c71f3bd50ae717490db628cd1dde8 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 07:32:29 -0500
Subject: [PATCH 22/23] Fix type mismatch that was upsetting the precommit CI
 bot

---
 clang/lib/Lex/PPDirectives.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 2902d5da7bc5cf5..9d5d6dcdb7a8c2a 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1386,7 +1386,7 @@ void Preprocessor::HandleDirective(Token &Result) {
       return HandleEmbedDirective(SavedHash.getLocation(), Result,
                                   getCurrentFileLexer()
                                       ? getCurrentFileLexer()->getFileEntry()
-                                      : nullptr);
+                                      : static_cast<FileEntry *>(nullptr));
     case tok::pp_assert:
       //isExtension = true;  // FIXME: implement #assert
       break;

>From f57334a078a20da3da4e327dbceb3dc83ad3a2fc Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Thu, 9 Nov 2023 08:11:00 -0500
Subject: [PATCH 23/23] Fix misuse of Twine and add a test

The issue would previously manifest in -E output where we would print:
  1>
instead of:
  <built-in:embed:1>
---
 clang/lib/Lex/PPDirectives.cpp                 | 18 ++++++------------
 .../Preprocessor/embed_preprocess_to_file.c    | 13 +++++++++++++
 2 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Preprocessor/embed_preprocess_to_file.c

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9d5d6dcdb7a8c2a..695fca9f5157aaa 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3899,11 +3899,6 @@ void Preprocessor::HandleEmbedDirectiveNaive(
   // particular.
   EmbedBuffers.push_back("");
   size_t EmbedBufferNumber = EmbedBuffers.size();
-  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
-  llvm::Twine EmbedBufferName = [](const std::string &Number) {
-    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
-    return PrefixNumber.concat(">");
-  }(EmbedBufferNumberVal);
   std::string &TargetEmbedBuffer = EmbedBuffers.back();
   const size_t TotalSize = BinaryContents.size();
   // In the future, this might change/improve.
@@ -3956,7 +3951,9 @@ void Preprocessor::HandleEmbedDirectiveNaive(
 
   // Create faux-file and its ID, backed by a memory buffer.
   std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
-      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+      llvm::MemoryBuffer::getMemBufferCopy(
+          TargetEmbedBuffer,
+          "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
   assert(EmbedMemBuffer && "Cannot create predefined source buffer");
   FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
   assert(EmbedBufferFID.isValid() &&
@@ -4113,11 +4110,6 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
   // particular.
   EmbedBuffers.push_back("");
   size_t EmbedBufferNumber = EmbedBuffers.size();
-  std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber);
-  llvm::Twine EmbedBufferName = [](const std::string &Number) {
-    llvm::Twine PrefixNumber = ("<built-in:embed:", Number);
-    return PrefixNumber.concat(">");
-  }(EmbedBufferNumberVal);
   std::string &TargetEmbedBuffer = EmbedBuffers.back();
   StringRef TypeName = "unsigned char";
   const size_t TotalSize =
@@ -4147,7 +4139,9 @@ void Preprocessor::HandleEmbedDirectiveBuiltin(
   TargetEmbedBuffer.append("\")");
   // Create faux-file and its ID, backed by a memory buffer.
   std::unique_ptr<llvm::MemoryBuffer> EmbedMemBuffer =
-      llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName);
+      llvm::MemoryBuffer::getMemBufferCopy(
+          TargetEmbedBuffer,
+          "<built-in:embed:" + Twine(EmbedBufferNumber) + ">");
   assert(EmbedMemBuffer && "Cannot create predefined source buffer");
   FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer));
   assert(EmbedBufferFID.isValid() &&
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
new file mode 100644
index 000000000000000..96447d4d6b11f7e
--- /dev/null
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -std=c23 -E -embed-dir=%S/Inputs | FileCheck %s
+
+// Ensure that we print out the correct data to the preprocessed file. Note,
+// #embed will do a base64 encoding of the file contents, so if art.txt changes,
+// this test will need to change accordingly as well.
+const char data[] = {
+#embed <media/art.txt>
+};
+
+// CHECK: # 1 "<built-in:embed:1>" 1
+// CHECK-NEXT: __builtin_pp_embed(unsigned char,"{{.*}}media{{\\|/}}art.txt","ICAgICAgICAgICBfXyAgXwogICAgICAgLi0uJyAgYDsgYC0uXyAgX18gIF8KICAgICAgKF8sICAgICAgICAgLi06JyAgYDsgYC0uXwogICAgLCdvIiggICAgICAgIChfLCAgICAgICAgICAgKQogICAoX18sLScgICAgICAsJ28iKCAgICAgICAgICAgICk+CiAgICAgICggICAgICAgKF9fLC0nICAgICAgICAgICAgKQogICAgICAgYC0nLl8uLS0uXyggICAgICAgICAgICAgKQogICAgICAgICAgfHx8ICB8fHxgLScuXy4tLS5fLi0nCiAgICAgICAgICAgICAgICAgICAgIHx8fCAgfHx8Cg==")
+// CHECK-NEXT: # 8 "{{.*}}embed_preprocess_to_file.c" 2
+};



More information about the cfe-commits mailing list