[clang] [clang-tools-extra] Reland [clang][Sema, Lex, Parse] Preprocessor embed in C and C++ (PR #95802)

Mariya Podchishchaeva via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 18 05:05:52 PDT 2024


https://github.com/Fznamznon updated https://github.com/llvm/llvm-project/pull/95802

>From d8af444f6ab7816d1a2bdefbc20c4265edf12f50 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Mon, 17 Jun 2024 06:59:21 -0700
Subject: [PATCH 1/7] Reland [clang][Sema, Lex, Parse] Preprocessor embed in C
 and C++

This commit implements the entirety of the now-accepted [N3017 -
Preprocessor
Embed](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3017.htm) and
its sister C++ paper [p1967](https://wg21.link/p1967). It implements
everything in the specification, and includes an implementation that
drastically improves the time it takes to embed data in specific
scenarios (the initialization of character type arrays). The mechanisms
used to do this are used under the "as-if" rule, and in general when the
system cannot detect it is initializing an array object in a variable
declaration, will generate EmbedExpr AST node which will be expanded
by AST consumers (CodeGen or constant expression evaluators) or
expand embed directive as a comma expression.

This reverts commit 682d461d5a231cee54d65910e6341769419a67d7.

---------

Co-authored-by: The Phantom Derpstorm <phdofthehouse at gmail.com>
Co-authored-by: Aaron Ballman <aaron at aaronballman.com>
Co-authored-by: cor3ntin <corentinjabot at gmail.com>
Co-authored-by: H. Vetinari <h.vetinari at gmx.com>
---
 .../test/pp-trace/pp-trace-macro.cpp          |   9 +
 clang/docs/LanguageExtensions.rst             |  24 +
 clang/include/clang/AST/Expr.h                | 160 ++++++
 clang/include/clang/AST/RecursiveASTVisitor.h |   5 +
 clang/include/clang/AST/TextNodeDumper.h      |   1 +
 .../clang/Basic/DiagnosticCommonKinds.td      |   3 +
 .../include/clang/Basic/DiagnosticLexKinds.td |  12 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 -
 clang/include/clang/Basic/FileManager.h       |  11 +-
 clang/include/clang/Basic/StmtNodes.td        |   1 +
 clang/include/clang/Basic/TokenKinds.def      |   6 +
 clang/include/clang/Driver/Options.td         |   6 +
 .../Frontend/PreprocessorOutputOptions.h      |   3 +
 clang/include/clang/Lex/PPCallbacks.h         |  54 ++
 .../include/clang/Lex/PPDirectiveParameter.h  |  33 ++
 clang/include/clang/Lex/PPEmbedParameters.h   |  94 ++++
 clang/include/clang/Lex/Preprocessor.h        |  71 ++-
 clang/include/clang/Lex/PreprocessorOptions.h |   3 +
 clang/include/clang/Parse/Parser.h            |   3 +
 clang/include/clang/Sema/Sema.h               |   4 +
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/AST/Expr.cpp                        |  12 +
 clang/lib/AST/ExprClassification.cpp          |   5 +
 clang/lib/AST/ExprConstant.cpp                |  63 ++-
 clang/lib/AST/Interp/ByteCodeExprGen.cpp      |  21 +-
 clang/lib/AST/Interp/ByteCodeExprGen.h        |   1 +
 clang/lib/AST/ItaniumMangle.cpp               |   1 +
 clang/lib/AST/StmtPrinter.cpp                 |   4 +
 clang/lib/AST/StmtProfile.cpp                 |   2 +
 clang/lib/AST/TextNodeDumper.cpp              |   5 +
 clang/lib/Basic/FileManager.cpp               |   7 +-
 clang/lib/Basic/IdentifierTable.cpp           |   5 +-
 clang/lib/CodeGen/CGExprAgg.cpp               |  40 +-
 clang/lib/CodeGen/CGExprConstant.cpp          | 118 ++++-
 clang/lib/CodeGen/CGExprScalar.cpp            |   7 +
 clang/lib/Driver/ToolChains/Clang.cpp         |   6 +-
 clang/lib/Frontend/CompilerInvocation.cpp     |   8 +
 clang/lib/Frontend/DependencyFile.cpp         |  25 +
 clang/lib/Frontend/DependencyGraph.cpp        |  24 +-
 clang/lib/Frontend/InitPreprocessor.cpp       |   8 +
 .../lib/Frontend/PrintPreprocessedOutput.cpp  | 122 ++++-
 clang/lib/Lex/PPDirectives.cpp                | 477 +++++++++++++++++-
 clang/lib/Lex/PPExpressions.cpp               |  49 +-
 clang/lib/Lex/PPMacroExpansion.cpp            | 111 ++++
 clang/lib/Lex/TokenConcatenation.cpp          |   5 +-
 clang/lib/Parse/ParseExpr.cpp                 |  37 +-
 clang/lib/Parse/ParseInit.cpp                 |  32 ++
 clang/lib/Parse/ParseTemplate.cpp             |  41 +-
 clang/lib/Sema/SemaExceptionSpec.cpp          |   1 +
 clang/lib/Sema/SemaExpr.cpp                   |  17 +-
 clang/lib/Sema/SemaInit.cpp                   | 113 ++++-
 clang/lib/Sema/TreeTransform.h                |   5 +
 clang/lib/Serialization/ASTReaderStmt.cpp     |  15 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |  11 +
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |   4 +
 clang/test/C/C2x/Inputs/bits.bin              |   1 +
 clang/test/C/C2x/Inputs/boop.h                |   1 +
 clang/test/C/C2x/Inputs/i.dat                 |   1 +
 clang/test/C/C2x/Inputs/jump.wav              |   1 +
 clang/test/C/C2x/Inputs/s.dat                 |   1 +
 clang/test/C/C2x/n3017.c                      | 216 ++++++++
 clang/test/Preprocessor/Inputs/jk.txt         |   1 +
 clang/test/Preprocessor/Inputs/media/art.txt  |   9 +
 clang/test/Preprocessor/Inputs/media/empty    |   0
 clang/test/Preprocessor/Inputs/null_byte.bin  | Bin 0 -> 1 bytes
 clang/test/Preprocessor/Inputs/numbers.txt    |   1 +
 .../test/Preprocessor/Inputs/single_byte.txt  |   1 +
 clang/test/Preprocessor/embed___has_embed.c   |  60 +++
 .../embed___has_embed_parsing_errors.c        | 240 +++++++++
 .../embed___has_embed_supported.c             |  24 +
 clang/test/Preprocessor/embed_art.c           | 104 ++++
 clang/test/Preprocessor/embed_codegen.cpp     |  84 +++
 clang/test/Preprocessor/embed_constexpr.cpp   |  97 ++++
 clang/test/Preprocessor/embed_dependencies.c  |  20 +
 .../Preprocessor/embed_ext_compat_diags.c     |  16 +
 .../test/Preprocessor/embed_feature_test.cpp  |   7 +
 .../embed_file_not_found_chevron.c            |   4 +
 .../Preprocessor/embed_file_not_found_quote.c |   4 +
 clang/test/Preprocessor/embed_init.c          |  29 ++
 .../Preprocessor/embed_parameter_if_empty.c   |  24 +
 .../test/Preprocessor/embed_parameter_limit.c |  94 ++++
 .../Preprocessor/embed_parameter_offset.c     |  89 ++++
 .../Preprocessor/embed_parameter_prefix.c     |  38 ++
 .../Preprocessor/embed_parameter_suffix.c     |  39 ++
 .../embed_parameter_unrecognized.c            |   9 +
 .../test/Preprocessor/embed_parsing_errors.c  | 130 +++++
 clang/test/Preprocessor/embed_path_chevron.c  |   8 +
 clang/test/Preprocessor/embed_path_quote.c    |   8 +
 .../Preprocessor/embed_preprocess_to_file.c   |  39 ++
 clang/test/Preprocessor/embed_single_entity.c |   7 +
 clang/test/Preprocessor/embed_weird.cpp       |  98 ++++
 clang/test/Preprocessor/init-aarch64.c        |   3 +
 clang/test/Preprocessor/init.c                |   3 +
 clang/test/Preprocessor/single_byte.txt       |   1 +
 clang/tools/libclang/CXCursor.cpp             |   1 +
 clang/www/c_status.html                       |   2 +-
 96 files changed, 3318 insertions(+), 107 deletions(-)
 create mode 100644 clang/include/clang/Lex/PPDirectiveParameter.h
 create mode 100644 clang/include/clang/Lex/PPEmbedParameters.h
 create mode 100644 clang/test/C/C2x/Inputs/bits.bin
 create mode 100644 clang/test/C/C2x/Inputs/boop.h
 create mode 100644 clang/test/C/C2x/Inputs/i.dat
 create mode 100644 clang/test/C/C2x/Inputs/jump.wav
 create mode 100644 clang/test/C/C2x/Inputs/s.dat
 create mode 100644 clang/test/C/C2x/n3017.c
 create mode 100644 clang/test/Preprocessor/Inputs/jk.txt
 create mode 100644 clang/test/Preprocessor/Inputs/media/art.txt
 create mode 100644 clang/test/Preprocessor/Inputs/media/empty
 create mode 100644 clang/test/Preprocessor/Inputs/null_byte.bin
 create mode 100644 clang/test/Preprocessor/Inputs/numbers.txt
 create mode 100644 clang/test/Preprocessor/Inputs/single_byte.txt
 create mode 100644 clang/test/Preprocessor/embed___has_embed.c
 create mode 100644 clang/test/Preprocessor/embed___has_embed_parsing_errors.c
 create mode 100644 clang/test/Preprocessor/embed___has_embed_supported.c
 create mode 100644 clang/test/Preprocessor/embed_art.c
 create mode 100644 clang/test/Preprocessor/embed_codegen.cpp
 create mode 100644 clang/test/Preprocessor/embed_constexpr.cpp
 create mode 100644 clang/test/Preprocessor/embed_dependencies.c
 create mode 100644 clang/test/Preprocessor/embed_ext_compat_diags.c
 create mode 100644 clang/test/Preprocessor/embed_feature_test.cpp
 create mode 100644 clang/test/Preprocessor/embed_file_not_found_chevron.c
 create mode 100644 clang/test/Preprocessor/embed_file_not_found_quote.c
 create mode 100644 clang/test/Preprocessor/embed_init.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_if_empty.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_limit.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_offset.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_prefix.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_suffix.c
 create mode 100644 clang/test/Preprocessor/embed_parameter_unrecognized.c
 create mode 100644 clang/test/Preprocessor/embed_parsing_errors.c
 create mode 100644 clang/test/Preprocessor/embed_path_chevron.c
 create mode 100644 clang/test/Preprocessor/embed_path_quote.c
 create mode 100644 clang/test/Preprocessor/embed_preprocess_to_file.c
 create mode 100644 clang/test/Preprocessor/embed_single_entity.c
 create mode 100644 clang/test/Preprocessor/embed_weird.cpp
 create mode 100644 clang/test/Preprocessor/single_byte.txt

diff --git a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
index 1d85607e86b7f..7c2a231101070 100644
--- a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
+++ b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
@@ -31,6 +31,15 @@ X
 // CHECK:        MacroNameTok: __STDC_UTF_32__
 // CHECK-NEXT:   MacroDirective: MD_Define
 // CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_NOT_FOUND__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_FOUND__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
+// CHECK-NEXT:   MacroNameTok: __STDC_EMBED_EMPTY__
+// CHECK-NEXT:   MacroDirective: MD_Define
+// CHECK:      - Callback: MacroDefined
 // CHECK:      - Callback: MacroDefined
 // CHECK-NEXT:   MacroNameTok: MACRO
 // CHECK-NEXT:   MacroDirective: MD_Define
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 92e6025c95a8c..9830b35faae12 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1502,6 +1502,7 @@ Attributes on Structured Bindings            __cpp_structured_bindings        C+
 Designated initializers (N494)                                                C99           C89
 Array & element qualification (N2607)                                         C23           C89
 Attributes (N2335)                                                            C23           C89
+``#embed`` (N3017)                                                            C23           C89, C++
 ============================================ ================================ ============= =============
 
 Type Trait Primitives
@@ -5664,3 +5665,26 @@ Compiling different TUs depending on these flags (including use of
 ``std::hardware_destructive_interference``)  with different compilers, macro
 definitions, or architecture flags will lead to ODR violations and should be
 avoided.
+
+``#embed`` Parameters
+=====================
+
+``clang::offset``
+-----------------
+The ``clang::offset`` embed parameter may appear zero or one time in the
+embed parameter sequence. Its preprocessor argument clause shall be present and
+have the form:
+
+..code-block: text
+
+  ( constant-expression )
+
+and shall be an integer constant expression. The integer constant expression
+shall not evaluate to a value less than 0. The token ``defined`` shall not
+appear within the constant expression.
+
+The offset will be used when reading the contents of the embedded resource to
+specify the starting offset to begin embedding from. The resources is treated
+as being empty if the specified offset is larger than the number of bytes in
+the resource. The offset will be applied *before* any ``limit`` parameters are
+applied.
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index f2bf667636dc9..352e4467ed9dd 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4799,6 +4799,166 @@ class SourceLocExpr final : public Expr {
   friend class ASTStmtReader;
 };
 
+/// Stores data related to a single #embed directive.
+struct EmbedDataStorage {
+  StringLiteral *Filename;
+  StringLiteral *BinaryData;
+  size_t getDataElementCount() const { return BinaryData->getByteLength(); }
+};
+
+/// Represents a reference to #emded data. By default, this references the whole
+/// range. Otherwise it represents a subrange of data imported by #embed
+/// directive. Needed to handle nested initializer lists with #embed directives.
+/// Example:
+///  struct S {
+///    int x, y;
+///  };
+///
+///  struct T {
+///    int x[2];
+///    struct S s
+///  };
+///
+///  struct T t[] = {
+///  #embed "data" // data contains 10 elements;
+///  };
+///
+/// The resulting semantic form of initializer list will contain (EE stands
+/// for EmbedExpr):
+///  { {EE(first two data elements), {EE(3rd element), EE(4th element) }},
+///  { {EE(5th and 6th element), {EE(7th element), EE(8th element) }},
+///  { {EE(9th and 10th element), { zeroinitializer }}}
+///
+/// EmbedExpr inside of a semantic initializer list and referencing more than
+/// one element can only appear for arrays of scalars.
+class EmbedExpr final : public Expr {
+  SourceLocation EmbedKeywordLoc;
+  IntegerLiteral *FakeChildNode = nullptr;
+  const ASTContext *Ctx = nullptr;
+  EmbedDataStorage *Data;
+  unsigned Begin = 0;
+  unsigned NumOfElements;
+
+public:
+  EmbedExpr(const ASTContext &Ctx, SourceLocation Loc, EmbedDataStorage *Data,
+            unsigned Begin, unsigned NumOfElements);
+  explicit EmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {}
+
+  SourceLocation getLocation() const { return EmbedKeywordLoc; }
+  SourceLocation getBeginLoc() const { return EmbedKeywordLoc; }
+  SourceLocation getEndLoc() const { return EmbedKeywordLoc; }
+
+  StringLiteral *getFilenameStringLiteral() const { return Data->Filename; }
+  StringLiteral *getDataStringLiteral() const { return Data->BinaryData; }
+  EmbedDataStorage *getData() const { return Data; }
+
+  unsigned getStartingElementPos() const { return Begin; }
+  size_t getDataElementCount() const { return NumOfElements; }
+
+  // Allows accessing every byte of EmbedExpr data and iterating over it.
+  // An Iterator knows the EmbedExpr that it refers to, and an offset value
+  // within the data.
+  // Dereferencing an Iterator results in construction of IntegerLiteral AST
+  // node filled with byte of data of the corresponding EmbedExpr within offset
+  // that the Iterator currently has.
+  template <bool Const>
+  class ChildElementIter
+      : public llvm::iterator_facade_base<
+            ChildElementIter<Const>, std::random_access_iterator_tag,
+            std::conditional_t<Const, const IntegerLiteral *,
+                               IntegerLiteral *>> {
+    friend class EmbedExpr;
+
+    EmbedExpr *EExpr = nullptr;
+    unsigned long long CurOffset = ULLONG_MAX;
+    using BaseTy = typename ChildElementIter::iterator_facade_base;
+
+    ChildElementIter(EmbedExpr *E) : EExpr(E) {
+      if (E)
+        CurOffset = E->getStartingElementPos();
+    }
+
+  public:
+    ChildElementIter() : CurOffset(ULLONG_MAX) {}
+    typename BaseTy::reference operator*() const {
+      assert(EExpr && CurOffset != ULLONG_MAX &&
+             "trying to dereference an invalid iterator");
+      IntegerLiteral *N = EExpr->FakeChildNode;
+      StringRef DataRef = EExpr->Data->BinaryData->getBytes();
+      N->setValue(*EExpr->Ctx,
+                  llvm::APInt(N->getValue().getBitWidth(), DataRef[CurOffset],
+                              N->getType()->isSignedIntegerType()));
+      // We want to return a reference to the fake child node in the
+      // EmbedExpr, not the local variable N.
+      return const_cast<typename BaseTy::reference>(EExpr->FakeChildNode);
+    }
+    typename BaseTy::pointer operator->() const { return **this; }
+    using BaseTy::operator++;
+    ChildElementIter &operator++() {
+      assert(EExpr && "trying to increment an invalid iterator");
+      assert(CurOffset != ULLONG_MAX &&
+             "Already at the end of what we can iterate over");
+      if (++CurOffset >=
+          EExpr->getDataElementCount() + EExpr->getStartingElementPos()) {
+        CurOffset = ULLONG_MAX;
+        EExpr = nullptr;
+      }
+      return *this;
+    }
+    bool operator==(ChildElementIter Other) const {
+      return (EExpr == Other.EExpr && CurOffset == Other.CurOffset);
+    }
+  }; // class ChildElementIter
+
+public:
+  using fake_child_range = llvm::iterator_range<ChildElementIter<false>>;
+  using const_fake_child_range = llvm::iterator_range<ChildElementIter<true>>;
+
+  fake_child_range underlying_data_elements() {
+    return fake_child_range(ChildElementIter<false>(this),
+                            ChildElementIter<false>());
+  }
+
+  const_fake_child_range underlying_data_elements() const {
+    return const_fake_child_range(
+        ChildElementIter<true>(const_cast<EmbedExpr *>(this)),
+        ChildElementIter<true>());
+  }
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+
+  const_child_range children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == EmbedExprClass;
+  }
+
+  ChildElementIter<false> begin() { return ChildElementIter<false>(this); }
+
+  ChildElementIter<true> begin() const {
+    return ChildElementIter<true>(const_cast<EmbedExpr *>(this));
+  }
+
+  template <typename Call, typename... Targs>
+  bool doForEachDataElement(Call &&C, unsigned &StartingIndexInArray,
+                            Targs &&...Fargs) const {
+    for (auto It : underlying_data_elements()) {
+      if (!std::invoke(std::forward<Call>(C), const_cast<IntegerLiteral *>(It),
+                       StartingIndexInArray, std::forward<Targs>(Fargs)...))
+        return false;
+      StartingIndexInArray++;
+    }
+    return true;
+  }
+
+private:
+  friend class ASTStmtReader;
+};
+
 /// Describes an C or C++ initializer list.
 ///
 /// InitListExpr describes an initializer list, which can be used to
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index aa55e2e7e8718..2785afd59bf21 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2864,6 +2864,11 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
 DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
 DEF_TRAVERSE_STMT(StmtExpr, {})
 DEF_TRAVERSE_STMT(SourceLocExpr, {})
+DEF_TRAVERSE_STMT(EmbedExpr, {
+  for (IntegerLiteral *IL : S->underlying_data_elements()) {
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(IL);
+  }
+})
 
 DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
   TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h
index abfafcaef271b..39dd1f515c9eb 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -409,6 +409,7 @@ class TextNodeDumper
   void VisitHLSLBufferDecl(const HLSLBufferDecl *D);
   void VisitOpenACCConstructStmt(const OpenACCConstructStmt *S);
   void VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S);
+  void VisitEmbedExpr(const EmbedExpr *S);
 };
 
 } // namespace clang
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 1e44bc4ad09b6..de758cbe679dc 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -275,6 +275,9 @@ def err_too_large_for_fixed_point : Error<
 def err_unimplemented_conversion_with_fixed_point_type : Error<
   "conversion between fixed point and %0 is not yet supported">;
 
+def err_requires_positive_value : Error<
+  "%select{invalid value '%0'; must be positive|value '%0' is too large}1">;
+
 // SEH
 def err_seh_expected_handler : Error<
   "expected '__except' or '__finally' block">;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 25fbfe83fa2bc..12d7b8c0205ee 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -436,6 +436,14 @@ def warn_cxx23_compat_warning_directive : Warning<
 def warn_c23_compat_warning_directive : Warning<
   "#warning is incompatible with C standards before C23">,
   InGroup<CPre23Compat>, DefaultIgnore;
+def ext_pp_embed_directive : ExtWarn<
+  "#embed is a %select{C23|Clang}0 extension">,
+  InGroup<C23>;
+def warn_compat_pp_embed_directive : Warning<
+  "#embed is incompatible with C standards before C23">,
+  InGroup<CPre23Compat>, DefaultIgnore;
+def err_pp_embed_dup_params : Error<
+  "cannot specify parameter '%0' twice in the same '#embed' directive">;
 
 def ext_pp_extra_tokens_at_eol : ExtWarn<
   "extra tokens at end of #%0 directive">, InGroup<ExtraTokens>;
@@ -505,6 +513,8 @@ def err_pp_invalid_directive : Error<
   "invalid preprocessing directive%select{|, did you mean '#%1'?}0">;
 def warn_pp_invalid_directive : Warning<
   err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
+def err_pp_unknown_parameter : Error<
+  "unknown%select{ | embed}0 preprocessor parameter '%1'">;
 def err_pp_directive_required : Error<
   "%0 must be used within a preprocessing directive">;
 def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal;
@@ -719,6 +729,8 @@ def err_pp_module_build_missing_end : Error<
   "no matching '#pragma clang module endbuild' for this '#pragma clang module build'">;
 
 def err_defined_macro_name : Error<"'defined' cannot be used as a macro name">;
+def err_defined_in_pp_embed : Error<
+  "'defined' cannot appear within this context">;
 def err_paste_at_start : Error<
   "'##' cannot appear at start of macro expansion">;
 def err_paste_at_end : Error<"'##' cannot appear at end of macro expansion">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9b8f5b7e80e7e..833e8b51c0257 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1097,8 +1097,6 @@ def note_surrounding_namespace_starts_here : Note<
   "surrounding namespace with visibility attribute starts here">;
 def err_pragma_loop_invalid_argument_type : Error<
   "invalid argument of type %0; expected an integer type">;
-def err_pragma_loop_invalid_argument_value : Error<
-  "%select{invalid value '%0'; must be positive|value '%0' is too large}1">;
 def err_pragma_loop_compatibility : Error<
   "%select{incompatible|duplicate}0 directives '%1' and '%2'">;
 def err_pragma_loop_precedes_nonloop : Error<
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index e1f33d57a8980..527bbef24793e 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -286,12 +286,15 @@ class FileManager : public RefCountedBase<FileManager> {
   /// MemoryBuffer if successful, otherwise returning null.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(FileEntryRef Entry, bool isVolatile = false,
-                   bool RequiresNullTerminator = true);
+                   bool RequiresNullTerminator = true,
+                   std::optional<int64_t> MaybeLimit = std::nullopt);
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(StringRef Filename, bool isVolatile = false,
-                   bool RequiresNullTerminator = true) const {
-    return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
-                                RequiresNullTerminator);
+                   bool RequiresNullTerminator = true,
+                   std::optional<int64_t> MaybeLimit = std::nullopt) const {
+    return getBufferForFileImpl(Filename,
+                                /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1),
+                                isVolatile, RequiresNullTerminator);
   }
 
 private:
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 6ca08abdb14f0..c59a17be7808f 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -204,6 +204,7 @@ def OpaqueValueExpr : StmtNode<Expr>;
 def TypoExpr : StmtNode<Expr>;
 def RecoveryExpr : StmtNode<Expr>;
 def BuiltinBitCastExpr : StmtNode<ExplicitCastExpr>;
+def EmbedExpr : StmtNode<Expr>;
 
 // Microsoft Extensions.
 def MSPropertyRefExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 9c4b17465e18a..37d570ca5e75b 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -126,6 +126,9 @@ PPKEYWORD(error)
 // C99 6.10.6 - Pragma Directive.
 PPKEYWORD(pragma)
 
+// C23 & C++26 #embed
+PPKEYWORD(embed)
+
 // GNU Extensions.
 PPKEYWORD(import)
 PPKEYWORD(include_next)
@@ -999,6 +1002,9 @@ ANNOTATION(header_unit)
 // Annotation for end of input in clang-repl.
 ANNOTATION(repl_input_end)
 
+// Annotation for #embed
+ANNOTATION(embed)
+
 #undef PRAGMA_ANNOTATION
 #undef ANNOTATION
 #undef TESTING_KEYWORD
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 15f62c5c1a6ab..0c04d272c1ac7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -880,6 +880,9 @@ will be ignored}]>;
 def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
     Visibility<[ClangOption, FlangOption]>,
     MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
+def embed_dir_EQ : Joined<["--"], "embed-dir=">, Group<Preprocessor_Group>,
+    Visibility<[ClangOption, CC1Option]>, MetaVarName<"<dir>">,
+    HelpText<"Add directory to embed search path">;
 def MD : Flag<["-"], "MD">, Group<M_Group>,
     HelpText<"Write a depfile containing user and system headers">;
 def MMD : Flag<["-"], "MMD">, Group<M_Group>,
@@ -1473,6 +1476,9 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>
 def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Print include directives in -E mode in addition to normal output">,
   MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>;
+def dE : Flag<["-"], "dE">, Group<d_Group>, Visibility<[CC1Option]>,
+  HelpText<"Print embed directives in -E mode in addition to normal output">,
+  MarshallingInfoFlag<PreprocessorOutputOpts<"ShowEmbedDirectives">>;
 def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   HelpText<"Print macro definitions in -E mode instead of normal output">;
 def dead__strip : Flag<["-"], "dead_strip">;
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index 6e19cae33cf28..654cf22f010f7 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -32,6 +32,8 @@ class PreprocessorOutputOptions {
   LLVM_PREFERRED_TYPE(bool)
   unsigned ShowIncludeDirectives : 1;  ///< Print includes, imports etc. within preprocessed output.
   LLVM_PREFERRED_TYPE(bool)
+  unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed
+  LLVM_PREFERRED_TYPE(bool)
   unsigned RewriteIncludes : 1;    ///< Preprocess include directives only.
   LLVM_PREFERRED_TYPE(bool)
   unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
@@ -51,6 +53,7 @@ class PreprocessorOutputOptions {
     ShowMacroComments = 0;
     ShowMacros = 0;
     ShowIncludeDirectives = 0;
+    ShowEmbedDirectives = 0;
     RewriteIncludes = 0;
     RewriteImports = 0;
     MinimizeWhitespace = 0;
diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index dfc74b52686f1..46cc564086f1c 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -27,6 +27,7 @@ class IdentifierInfo;
 class MacroDefinition;
 class MacroDirective;
 class MacroArgs;
+struct LexEmbedParametersResult;
 
 /// This interface provides a way to observe the actions of the
 /// preprocessor as it does its thing.
@@ -83,6 +84,34 @@ class PPCallbacks {
                            const Token &FilenameTok,
                            SrcMgr::CharacteristicKind FileType) {}
 
+  /// Callback invoked whenever the preprocessor cannot find a file for an
+  /// embed directive.
+  ///
+  /// \param FileName The name of the file being included, as written in the
+  /// source code.
+  ///
+  /// \returns true to indicate that the preprocessor should skip this file
+  /// and not issue any diagnostic.
+  virtual bool EmbedFileNotFound(StringRef FileName) { return false; }
+
+  /// Callback invoked whenever an embed directive has been processed,
+  /// regardless of whether the embed will actually find a file.
+  ///
+  /// \param HashLoc The location of the '#' that starts the embed directive.
+  ///
+  /// \param FileName The name of the file being included, as written in the
+  /// source code.
+  ///
+  /// \param IsAngled Whether the file name was enclosed in angle brackets;
+  /// otherwise, it was enclosed in quotes.
+  ///
+  /// \param File The actual file that may be included by this embed directive.
+  ///
+  /// \param Params The parameters used by the directive.
+  virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName,
+                              bool IsAngled, OptionalFileEntryRef File,
+                              const LexEmbedParametersResult &Params) {}
+
   /// Callback invoked whenever the preprocessor cannot find a file for an
   /// inclusion directive.
   ///
@@ -333,6 +362,10 @@ class PPCallbacks {
                        SourceRange Range) {
   }
 
+  /// Hook called when a '__has_embed' directive is read.
+  virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+                        OptionalFileEntryRef File) {}
+
   /// Hook called when a '__has_include' or '__has_include_next' directive is
   /// read.
   virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
@@ -464,6 +497,21 @@ class PPChainedCallbacks : public PPCallbacks {
     Second->FileSkipped(SkippedFile, FilenameTok, FileType);
   }
 
+  bool EmbedFileNotFound(StringRef FileName) override {
+    bool Skip = First->FileNotFound(FileName);
+    // Make sure to invoke the second callback, no matter if the first already
+    // returned true to skip the file.
+    Skip |= Second->FileNotFound(FileName);
+    return Skip;
+  }
+
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override {
+    First->EmbedDirective(HashLoc, FileName, IsAngled, File, Params);
+    Second->EmbedDirective(HashLoc, FileName, IsAngled, File, Params);
+  }
+
   bool FileNotFound(StringRef FileName) override {
     bool Skip = First->FileNotFound(FileName);
     // Make sure to invoke the second callback, no matter if the first already
@@ -565,6 +613,12 @@ class PPChainedCallbacks : public PPCallbacks {
     Second->PragmaDiagnostic(Loc, Namespace, mapping, Str);
   }
 
+  void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+                OptionalFileEntryRef File) override {
+    First->HasEmbed(Loc, FileName, IsAngled, File);
+    Second->HasEmbed(Loc, FileName, IsAngled, File);
+  }
+
   void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
                   OptionalFileEntryRef File,
                   SrcMgr::CharacteristicKind FileType) override;
diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h
new file mode 100644
index 0000000000000..83f0566d739b1
--- /dev/null
+++ b/clang/include/clang/Lex/PPDirectiveParameter.h
@@ -0,0 +1,33 @@
+//===--- PPDirectiveParameter.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the base class for preprocessor directive parameters, such
+// as limit(1) or suffix(x) for #embed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+
+#include "clang/Basic/SourceLocation.h"
+
+namespace clang {
+
+/// Captures basic information about a preprocessor directive parameter.
+class PPDirectiveParameter {
+  SourceRange R;
+
+public:
+  PPDirectiveParameter(SourceRange R) : R(R) {}
+
+  SourceRange getParameterRange() const { return R; }
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
new file mode 100644
index 0000000000000..51bf908524e7a
--- /dev/null
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -0,0 +1,94 @@
+//===--- PPEmbedParameters.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines all of the preprocessor directive parmeters for #embed
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+
+#include "clang/Lex/PPDirectiveParameter.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+
+/// Preprocessor extension embed parameter "clang::offset"
+/// `clang::offset( constant-expression )`
+class PPEmbedParameterOffset : public PPDirectiveParameter {
+public:
+  size_t Offset;
+
+  PPEmbedParameterOffset(size_t Offset, SourceRange R)
+      : PPDirectiveParameter(R), Offset(Offset) {}
+};
+
+/// Preprocessor standard embed parameter "limit"
+/// `limit( constant-expression )`
+class PPEmbedParameterLimit : public PPDirectiveParameter {
+public:
+  size_t Limit;
+
+  PPEmbedParameterLimit(size_t Limit, SourceRange R)
+      : PPDirectiveParameter(R), Limit(Limit) {}
+};
+
+/// Preprocessor standard embed parameter "prefix"
+/// `prefix( balanced-token-seq )`
+class PPEmbedParameterPrefix : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterPrefix(SmallVectorImpl<Token> &&Tokens, SourceRange R)
+      : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {}
+};
+
+/// Preprocessor standard embed parameter "suffix"
+/// `suffix( balanced-token-seq )`
+class PPEmbedParameterSuffix : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterSuffix(SmallVectorImpl<Token> &&Tokens, SourceRange R)
+      : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {}
+};
+
+/// Preprocessor standard embed parameter "if_empty"
+/// `if_empty( balanced-token-seq )`
+class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
+public:
+  SmallVector<Token, 2> Tokens;
+
+  PPEmbedParameterIfEmpty(SmallVectorImpl<Token> &&Tokens, SourceRange R)
+      : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {}
+};
+
+struct LexEmbedParametersResult {
+  std::optional<PPEmbedParameterLimit> MaybeLimitParam;
+  std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
+  std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
+  std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
+  std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
+  SourceRange ParamRange;
+  int UnrecognizedParams;
+
+  size_t PrefixTokenCount() const {
+    if (MaybePrefixParam)
+      return MaybePrefixParam->Tokens.size();
+    return 0;
+  }
+  size_t SuffixTokenCount() const {
+    if (MaybeSuffixParam)
+      return MaybeSuffixParam->Tokens.size();
+    return 0;
+  }
+};
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 9d8a1aae23df3..60186181c9fcd 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,8 +29,10 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PPEmbedParameters.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -119,6 +121,13 @@ enum MacroUse {
   MU_Undef  = 2
 };
 
+enum class EmbedResult {
+  Invalid = -1, // Parsing error occurred.
+  NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
+  Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
+  Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
+};
+
 /// Engages in a tight little dance with the lexer to efficiently
 /// preprocess tokens.
 ///
@@ -165,6 +174,7 @@ class Preprocessor {
   IdentifierInfo *Ident__has_builtin;              // __has_builtin
   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
   IdentifierInfo *Ident__has_attribute;            // __has_attribute
+  IdentifierInfo *Ident__has_embed;                // __has_embed
   IdentifierInfo *Ident__has_include;              // __has_include
   IdentifierInfo *Ident__has_include_next;         // __has_include_next
   IdentifierInfo *Ident__has_warning;              // __has_warning
@@ -1734,6 +1744,10 @@ class Preprocessor {
   /// Lex a token, forming a header-name token if possible.
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
+  /// Lex the parameters for an #embed directive, returns nullopt on error.
+  std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
+                                                             bool ForHasEmbed);
+
   bool LexAfterModuleImport(Token &Result);
   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
 
@@ -2314,7 +2328,13 @@ class Preprocessor {
 
   /// Read and discard all tokens remaining on the current line until
   /// the tok::eod token is found. Returns the range of the skipped tokens.
-  SourceRange DiscardUntilEndOfDirective();
+  SourceRange DiscardUntilEndOfDirective() {
+    Token Tmp;
+    return DiscardUntilEndOfDirective(Tmp);
+  }
+
+  /// Same as above except retains the token that was found.
+  SourceRange DiscardUntilEndOfDirective(Token &Tok);
 
   /// Returns true if the preprocessor has seen a use of
   /// __DATE__ or __TIME__ in the file so far.
@@ -2419,6 +2439,18 @@ class Preprocessor {
              bool *IsFrameworkFound, bool SkipCache = false,
              bool OpenFile = true, bool CacheFailures = true);
 
+  /// Given a "Filename" or \<Filename> reference, look up the indicated embed
+  /// resource. \p isAngled indicates whether the file reference is for
+  /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
+  /// is true, the file looked up is opened for reading, otherwise it only
+  /// validates that the file exists. Quoted filenames are looked up relative
+  /// to \p LookupFromFile if it is nonnull.
+  ///
+  /// Returns std::nullopt on failure.
+  OptionalFileEntryRef
+  LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
+                  const FileEntry *LookupFromFile = nullptr);
+
   /// Return true if we're in the top-level file, not in a \#include.
   bool isInPrimaryFile() const;
 
@@ -2524,6 +2556,9 @@ class Preprocessor {
   /// Information about the result for evaluating an expression for a
   /// preprocessor directive.
   struct DirectiveEvalResult {
+    /// The integral value of the expression.
+    std::optional<llvm::APSInt> Value;
+
     /// Whether the expression was evaluated as true or not.
     bool Conditional;
 
@@ -2538,7 +2573,25 @@ class Preprocessor {
   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
   ///
   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
-  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                                  bool CheckForEoD = true);
+
+  /// Evaluate an integer constant expression that may occur after a
+  /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
+  ///
+  /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
+  /// \p EvaluatedDefined will contain the result of whether "defined" appeared
+  /// in the evaluated expression or not.
+  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                                  Token &Tok,
+                                                  bool &EvaluatedDefined,
+                                                  bool CheckForEoD = true);
+
+  /// Process a '__has_embed("path" [, ...])' expression.
+  ///
+  /// Returns predefined `__STDC_EMBED_*` macro values if
+  /// successful.
+  EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
 
   /// Process a '__has_include("path")' expression.
   ///
@@ -2686,6 +2739,13 @@ class Preprocessor {
       const FileEntry *LookupFromFile, StringRef &LookupFilename,
       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
+  // Binary data inclusion
+  void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+                            const FileEntry *LookupFromFile = nullptr);
+  void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
+                                StringRef ResolvedFilename,
+                                const LexEmbedParametersResult &Params,
+                                StringRef BinaryContents);
 
   // File inclusion.
   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
@@ -3002,6 +3062,13 @@ class EmptylineHandler {
   virtual void HandleEmptyline(SourceRange Range) = 0;
 };
 
+/// Helper class to shuttle information about #embed directives from the
+/// preprocessor to the parser through an annotation token.
+struct EmbedAnnotationData {
+  llvm::SmallString<32> FileName;
+  StringRef BinaryData;
+};
+
 /// Registry of pragma handlers added by plugins
 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
 
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 635971d0ce5ee..c2e3d68333024 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -170,6 +170,9 @@ class PreprocessorOptions {
   /// of the specified memory buffer (the second part of each pair).
   std::vector<std::pair<std::string, llvm::MemoryBuffer *>> RemappedFileBuffers;
 
+  /// User specified embed entries.
+  std::vector<std::string> EmbedEntries;
+
   /// Whether the compiler instance should retain (i.e., not free)
   /// the buffers associated with remapped files.
   ///
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index d054b8cf0d240..95c0655f9a214 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2122,6 +2122,8 @@ class Parser : public CodeCompletionHandler {
     QualType PreferredBaseType;
   };
   ExprResult ParseInitializerWithPotentialDesignator(DesignatorCompletionInfo);
+  ExprResult createEmbedExpr();
+  void ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs);
 
   //===--------------------------------------------------------------------===//
   // clang Expressions
@@ -3813,6 +3815,7 @@ class Parser : public CodeCompletionHandler {
   AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS,
                                 ImplicitTypenameContext AllowImplicitTypename,
                                 bool IsClassName = false);
+  void ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs);
   bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
                                  TemplateTy Template, SourceLocation OpenLoc);
   ParsedTemplateArgument ParseTemplateTemplateArgument();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 4d4579fcfd456..9bf01417186c3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5729,6 +5729,10 @@ class Sema final : public SemaBase {
                                 SourceLocation BuiltinLoc,
                                 SourceLocation RPLoc);
 
+  // #embed
+  ExprResult ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
+                            StringLiteral *Filename, StringLiteral *BinaryData);
+
   // Build a potentially resolved SourceLocExpr.
   ExprResult BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
                                 SourceLocation BuiltinLoc, SourceLocation RPLoc,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index a4728b1c06b3f..8222c92d6506f 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1655,6 +1655,9 @@ enum StmtCode {
   /// A SourceLocExpr record.
   EXPR_SOURCE_LOC,
 
+  /// A EmbedExpr record.
+  EXPR_BUILTIN_PP_EMBED,
+
   /// A ShuffleVectorExpr record.
   EXPR_SHUFFLE_VECTOR,
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 7e555689b64c4..04b331a911913 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2373,6 +2373,17 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
   llvm_unreachable("unhandled case");
 }
 
+EmbedExpr::EmbedExpr(const ASTContext &Ctx, SourceLocation Loc,
+                     EmbedDataStorage *Data, unsigned Begin,
+                     unsigned NumOfElements)
+    : Expr(EmbedExprClass, Ctx.UnsignedCharTy, VK_PRValue, OK_Ordinary),
+      EmbedKeywordLoc(Loc), Ctx(&Ctx), Data(Data), Begin(Begin),
+      NumOfElements(NumOfElements) {
+  setDependence(ExprDependence::None);
+  FakeChildNode = IntegerLiteral::Create(
+      Ctx, llvm::APInt::getZero(Ctx.getTypeSize(getType())), getType(), Loc);
+}
+
 InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc,
                            ArrayRef<Expr *> initExprs, SourceLocation rbraceloc)
     : Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary),
@@ -3615,6 +3626,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CXXUuidofExprClass:
   case OpaqueValueExprClass:
   case SourceLocExprClass:
+  case EmbedExprClass:
   case ConceptSpecializationExprClass:
   case RequiresExprClass:
   case SYCLUniqueStableNameExprClass:
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index 390000e3ed383..6482cb6d39acc 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::RequiresExprClass:
     return Cl::CL_PRValue;
 
+  case Expr::EmbedExprClass:
+    // Nominally, this just goes through as a PRValue until we actually expand
+    // it and check it.
+    return Cl::CL_PRValue;
+
   // Make HLSL this reference-like
   case Expr::CXXThisExprClass:
     return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7178f081d9cf3..ec16a1a72c46d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7727,6 +7727,11 @@ class ExprEvaluatorBase
     return Error(E);
   }
 
+  bool VisitEmbedExpr(const EmbedExpr *E) {
+    const auto It = E->begin();
+    return StmtVisitorTy::Visit(*It);
+  }
+
   bool VisitPredefinedExpr(const PredefinedExpr *E) {
     return StmtVisitorTy::Visit(E->getFunctionName());
   }
@@ -9145,6 +9150,11 @@ class PointerExprEvaluator
     return true;
   }
 
+  bool VisitEmbedExpr(const EmbedExpr *E) {
+    llvm_unreachable("Not yet implemented for ExprConstant.cpp");
+    return true;
+  }
+
   bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) {
     std::string ResultStr = E->ComputeName(Info.Ctx);
 
@@ -11249,8 +11259,17 @@ bool ArrayExprEvaluator::VisitCXXParenListOrInitListExpr(
 
   // If the initializer might depend on the array index, run it for each
   // array element.
-  if (NumEltsToInit != NumElts && MaybeElementDependentArrayFiller(ArrayFiller))
+  if (NumEltsToInit != NumElts &&
+      MaybeElementDependentArrayFiller(ArrayFiller)) {
     NumEltsToInit = NumElts;
+  } else {
+    for (auto *Init : Args) {
+      if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts()))
+        NumEltsToInit += EmbedS->getDataElementCount() - 1;
+    }
+    if (NumEltsToInit > NumElts)
+      NumEltsToInit = NumElts;
+  }
 
   LLVM_DEBUG(llvm::dbgs() << "The number of elements to initialize: "
                           << NumEltsToInit << ".\n");
@@ -11268,16 +11287,49 @@ bool ArrayExprEvaluator::VisitCXXParenListOrInitListExpr(
 
   LValue Subobject = This;
   Subobject.addArray(Info, ExprToVisit, CAT);
-  for (unsigned Index = 0; Index != NumEltsToInit; ++Index) {
-    const Expr *Init = Index < Args.size() ? Args[Index] : ArrayFiller;
-    if (!EvaluateInPlace(Result.getArrayInitializedElt(Index),
-                         Info, Subobject, Init) ||
+  auto Eval = [&](const Expr *Init, unsigned ArrayIndex) {
+    if (!EvaluateInPlace(Result.getArrayInitializedElt(ArrayIndex), Info,
+                         Subobject, Init) ||
         !HandleLValueArrayAdjustment(Info, Init, Subobject,
                                      CAT->getElementType(), 1)) {
       if (!Info.noteFailure())
         return false;
       Success = false;
     }
+    return true;
+  };
+  unsigned ArrayIndex = 0;
+  QualType DestTy = CAT->getElementType();
+  APSInt Value(Info.Ctx.getTypeSize(DestTy), DestTy->isUnsignedIntegerType());
+  for (unsigned Index = 0; Index != NumEltsToInit; ++Index) {
+    const Expr *Init = Index < Args.size() ? Args[Index] : ArrayFiller;
+    if (ArrayIndex >= NumEltsToInit)
+      break;
+    if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+      StringLiteral *SL = EmbedS->getDataStringLiteral();
+      for (unsigned I = EmbedS->getStartingElementPos(),
+                    N = EmbedS->getDataElementCount();
+           I != EmbedS->getStartingElementPos() + N; ++I) {
+        Value = SL->getCodeUnit(I);
+        if (DestTy->isIntegerType()) {
+          Result.getArrayInitializedElt(ArrayIndex) = APValue(Value);
+        } else {
+          assert(DestTy->isFloatingType() && "unexpected type");
+          const FPOptions FPO =
+              Init->getFPFeaturesInEffect(Info.Ctx.getLangOpts());
+          APFloat FValue(0.0);
+          if (!HandleIntToFloatCast(Info, Init, FPO, EmbedS->getType(), Value,
+                                    DestTy, FValue))
+            return false;
+          Result.getArrayInitializedElt(ArrayIndex) = APValue(FValue);
+        }
+        ArrayIndex++;
+      }
+    } else {
+      if (!Eval(Init, ArrayIndex))
+        return false;
+      ++ArrayIndex;
+    }
   }
 
   if (!Result.hasArrayFiller())
@@ -16363,6 +16415,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::SizeOfPackExprClass:
   case Expr::GNUNullExprClass:
   case Expr::SourceLocExprClass:
+  case Expr::EmbedExprClass:
     return NoDiag();
 
   case Expr::PackIndexingExprClass:
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index e61c0a70a0d8a..6362f47787cad 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1231,11 +1231,20 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
       }
     }
 
+    auto Eval = [&](Expr *Init, unsigned ElemIndex) {
+      return visitArrayElemInit(ElemIndex, Init);
+    };
+
     unsigned ElementIndex = 0;
     for (const Expr *Init : Inits) {
-      if (!this->visitArrayElemInit(ElementIndex, Init))
-        return false;
-      ++ElementIndex;
+      if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+        if (!EmbedS->doForEachDataElement(Eval, ElementIndex))
+          return false;
+      } else {
+        if (!this->visitArrayElemInit(ElementIndex, Init))
+          return false;
+        ++ElementIndex;
+      }
     }
 
     // Expand the filler expression.
@@ -1381,6 +1390,12 @@ bool ByteCodeExprGen<Emitter>::VisitConstantExpr(const ConstantExpr *E) {
   return this->delegate(E->getSubExpr());
 }
 
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::VisitEmbedExpr(const EmbedExpr *E) {
+  auto It = E->begin();
+  return this->visit(*It);
+}
+
 static CharUnits AlignOfType(QualType T, const ASTContext &ASTCtx,
                              UnaryExprOrTypeTrait Kind) {
   bool AlignOfReturnsPreferred =
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index b0faac8020fb2..155bede340fb4 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -115,6 +115,7 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
   bool VisitSizeOfPackExpr(const SizeOfPackExpr *E);
   bool VisitGenericSelectionExpr(const GenericSelectionExpr *E);
   bool VisitChooseExpr(const ChooseExpr *E);
+  bool VisitEmbedExpr(const EmbedExpr *E);
   bool VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E);
   bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
   bool VisitExpressionTraitExpr(const ExpressionTraitExpr *E);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index ed9e6eeb36c75..eac1801445255 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4760,6 +4760,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
   case Expr::PseudoObjectExprClass:
   case Expr::AtomicExprClass:
   case Expr::SourceLocExprClass:
+  case Expr::EmbedExprClass:
   case Expr::BuiltinBitCastExprClass:
   {
     NotPrimaryExpr();
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 8f51d16b5db03..2d223a9c05f0a 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1177,6 +1177,10 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
   OS << Node->getBuiltinStr() << "()";
 }
 
+void StmtPrinter::VisitEmbedExpr(EmbedExpr *Node) {
+  assert(false && "not yet implemented");
+}
+
 void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
   PrintExpr(Node->getSubExpr());
 }
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index d1655905a6656..1add5caaf9f2e 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2313,6 +2313,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) {
   VisitExpr(E);
 }
 
+void StmtProfiler::VisitEmbedExpr(const EmbedExpr *E) { VisitExpr(E); }
+
 void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); }
 
 void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) {
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index bd1e630cd9047..a26f50f0719c1 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -2884,3 +2884,8 @@ void TextNodeDumper::VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S) {
   else
     OS << " parent: " << S->getParentComputeConstruct();
 }
+
+void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) {
+  AddChild("begin", [=] { OS << S->getStartingElementPos(); });
+  AddChild("number of elements", [=] { OS << S->getDataElementCount(); });
+}
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index 1dc51deb82987..4509cee1ca0fe 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -530,13 +530,18 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
 
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
 FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
-                              bool RequiresNullTerminator) {
+                              bool RequiresNullTerminator,
+                              std::optional<int64_t> MaybeLimit) {
   const FileEntry *Entry = &FE.getFileEntry();
   // If the content is living on the file entry, return a reference to it.
   if (Entry->Content)
     return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef());
 
   uint64_t FileSize = Entry->getSize();
+
+  if (MaybeLimit)
+    FileSize = *MaybeLimit;
+
   // If there's a high enough chance that the file have changed since we
   // got its size, force a stat before opening it.
   if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index feea84544d62f..04cc9c7dadf86 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -425,8 +425,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // collisions (if there were, the switch below would complain about duplicate
   // case values).  Note that this depends on 'if' being null terminated.
 
-#define HASH(LEN, FIRST, THIRD) \
-  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
+#define HASH(LEN, FIRST, THIRD)                                                \
+  (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
 #define CASE(LEN, FIRST, THIRD, NAME) \
   case HASH(LEN, FIRST, THIRD): \
     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -441,6 +441,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 4, 'e', 's', else);
   CASE( 4, 'l', 'n', line);
   CASE( 4, 's', 'c', sccs);
+  CASE(5, 'e', 'b', embed);
   CASE( 5, 'e', 'd', endif);
   CASE( 5, 'e', 'r', error);
   CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index c369163ffaa9c..0c87558521924 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -506,6 +506,16 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
   uint64_t NumInitElements = Args.size();
 
   uint64_t NumArrayElements = AType->getNumElements();
+  for (const auto *Init : Args) {
+    if (const auto *Embed = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+      NumInitElements += Embed->getDataElementCount() - 1;
+      if (NumInitElements > NumArrayElements) {
+        NumInitElements = NumArrayElements;
+        break;
+      }
+    }
+  }
+
   assert(NumInitElements <= NumArrayElements);
 
   QualType elementType =
@@ -574,23 +584,37 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
 
   llvm::Value *one = llvm::ConstantInt::get(CGF.SizeTy, 1);
 
-  // Emit the explicit initializers.
-  for (uint64_t i = 0; i != NumInitElements; ++i) {
+  auto Emit = [&](Expr *Init, uint64_t ArrayIndex) {
     llvm::Value *element = begin;
-    if (i > 0) {
-      element = Builder.CreateInBoundsGEP(llvmElementType, begin,
-                                          llvm::ConstantInt::get(CGF.SizeTy, i),
-                                          "arrayinit.element");
+    if (ArrayIndex > 0) {
+      element = Builder.CreateInBoundsGEP(
+          llvmElementType, begin,
+          llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex), "arrayinit.element");
 
       // Tell the cleanup that it needs to destroy up to this
       // element.  TODO: some of these stores can be trivially
       // observed to be unnecessary.
-      if (endOfInit.isValid()) Builder.CreateStore(element, endOfInit);
+      if (endOfInit.isValid())
+        Builder.CreateStore(element, endOfInit);
     }
 
     LValue elementLV = CGF.MakeAddrLValue(
         Address(element, llvmElementType, elementAlign), elementType);
-    EmitInitializationToLValue(Args[i], elementLV);
+    EmitInitializationToLValue(Init, elementLV);
+    return true;
+  };
+
+  unsigned ArrayIndex = 0;
+  // Emit the explicit initializers.
+  for (uint64_t i = 0; i != NumInitElements; ++i) {
+    if (ArrayIndex >= NumInitElements)
+      break;
+    if (auto *EmbedS = dyn_cast<EmbedExpr>(Args[i]->IgnoreParenImpCasts())) {
+      EmbedS->doForEachDataElement(Emit, ArrayIndex);
+    } else {
+      Emit(Args[i], ArrayIndex);
+      ArrayIndex++;
+    }
   }
 
   // Check whether there's a non-trivial array-fill expression.
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 0712f40fd8215..0fd3792c329cd 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1061,6 +1061,24 @@ class ConstExprEmitter
     return Visit(E->getInitializer(), T);
   }
 
+  llvm::Constant *ProduceIntToIntCast(const Expr *E, QualType DestType) {
+    QualType FromType = E->getType();
+    // See also HandleIntToIntCast in ExprConstant.cpp
+    if (FromType->isIntegerType())
+      if (llvm::Constant *C = Visit(E, FromType))
+        if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) {
+          unsigned SrcWidth = CGM.getContext().getIntWidth(FromType);
+          unsigned DstWidth = CGM.getContext().getIntWidth(DestType);
+          if (DstWidth == SrcWidth)
+            return CI;
+          llvm::APInt A = FromType->isSignedIntegerType()
+                              ? CI->getValue().sextOrTrunc(DstWidth)
+                              : CI->getValue().zextOrTrunc(DstWidth);
+          return llvm::ConstantInt::get(CGM.getLLVMContext(), A);
+        }
+    return nullptr;
+  }
+
   llvm::Constant *VisitCastExpr(const CastExpr *E, QualType destType) {
     if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
       CGM.EmitExplicitCastExprType(ECE, Emitter.CGF);
@@ -1142,23 +1160,8 @@ class ConstExprEmitter
     case CK_IntToOCLSampler:
       llvm_unreachable("global sampler variables are not generated");
 
-    case CK_IntegralCast: {
-      QualType FromType = subExpr->getType();
-      // See also HandleIntToIntCast in ExprConstant.cpp
-      if (FromType->isIntegerType())
-        if (llvm::Constant *C = Visit(subExpr, FromType))
-          if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) {
-            unsigned SrcWidth = CGM.getContext().getIntWidth(FromType);
-            unsigned DstWidth = CGM.getContext().getIntWidth(destType);
-            if (DstWidth == SrcWidth)
-              return CI;
-            llvm::APInt A = FromType->isSignedIntegerType()
-                                ? CI->getValue().sextOrTrunc(DstWidth)
-                                : CI->getValue().zextOrTrunc(DstWidth);
-            return llvm::ConstantInt::get(CGM.getLLVMContext(), A);
-          }
-      return nullptr;
-    }
+    case CK_IntegralCast:
+      return ProduceIntToIntCast(subExpr, destType);
 
     case CK_Dependent: llvm_unreachable("saw dependent cast!");
 
@@ -1249,15 +1252,42 @@ class ConstExprEmitter
     return llvm::ConstantInt::get(CGM.getLLVMContext(), I->getValue());
   }
 
+  static APValue withDestType(ASTContext &Ctx, const Expr *E, QualType SrcType,
+                              QualType DestType, const llvm::APSInt &Value) {
+    if (!Ctx.hasSameType(SrcType, DestType)) {
+      if (DestType->isFloatingType()) {
+        llvm::APFloat Result =
+            llvm::APFloat(Ctx.getFloatTypeSemantics(DestType), 1);
+        llvm::RoundingMode RM =
+            E->getFPFeaturesInEffect(Ctx.getLangOpts()).getRoundingMode();
+        if (RM == llvm::RoundingMode::Dynamic)
+          RM = llvm::RoundingMode::NearestTiesToEven;
+        Result.convertFromAPInt(Value, Value.isSigned(), RM);
+        return APValue(Result);
+      }
+    }
+    return APValue(Value);
+  }
+
   llvm::Constant *EmitArrayInitialization(const InitListExpr *ILE, QualType T) {
     auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType());
     assert(CAT && "can't emit array init for non-constant-bound array");
+    uint64_t NumInitElements = ILE->getNumInits();
     const uint64_t NumElements = CAT->getZExtSize();
+    for (const auto *Init : ILE->inits()) {
+      if (const auto *Embed =
+              dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+        NumInitElements += Embed->getDataElementCount() - 1;
+        if (NumInitElements > NumElements) {
+          NumInitElements = NumElements;
+          break;
+        }
+      }
+    }
 
     // Initialising an array requires us to automatically
     // initialise any elements that have not been initialised explicitly
-    uint64_t NumInitableElts =
-        std::min<uint64_t>(ILE->getNumInits(), NumElements);
+    uint64_t NumInitableElts = std::min<uint64_t>(NumInitElements, NumElements);
 
     QualType EltType = CAT->getElementType();
 
@@ -1270,23 +1300,61 @@ class ConstExprEmitter
     }
 
     // Copy initializer elements.
-    SmallVector<llvm::Constant*, 16> Elts;
+    SmallVector<llvm::Constant *, 16> Elts;
     if (fillC && fillC->isNullValue())
       Elts.reserve(NumInitableElts + 1);
     else
       Elts.reserve(NumElements);
 
     llvm::Type *CommonElementType = nullptr;
-    for (unsigned i = 0; i < NumInitableElts; ++i) {
-      const Expr *Init = ILE->getInit(i);
-      llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
+    auto Emit = [&](const Expr *Init, unsigned ArrayIndex) {
+      llvm::Constant *C = nullptr;
+      C = Emitter.tryEmitPrivateForMemory(Init, EltType);
       if (!C)
-        return nullptr;
-      if (i == 0)
+        return false;
+      if (ArrayIndex == 0)
         CommonElementType = C->getType();
       else if (C->getType() != CommonElementType)
         CommonElementType = nullptr;
       Elts.push_back(C);
+      return true;
+    };
+
+    unsigned ArrayIndex = 0;
+    QualType DestTy = CAT->getElementType();
+    for (unsigned i = 0; i < ILE->getNumInits(); ++i) {
+      const Expr *Init = ILE->getInit(i);
+      if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+        StringLiteral *SL = EmbedS->getDataStringLiteral();
+        llvm::APSInt Value(CGM.getContext().getTypeSize(DestTy),
+                           DestTy->isUnsignedIntegerType());
+        llvm::Constant *C;
+        for (unsigned I = EmbedS->getStartingElementPos(),
+                      N = EmbedS->getDataElementCount();
+             I != EmbedS->getStartingElementPos() + N; ++I) {
+          Value = SL->getCodeUnit(I);
+          if (DestTy->isIntegerType()) {
+            C = llvm::ConstantInt::get(CGM.getLLVMContext(), Value);
+          } else {
+            C = Emitter.tryEmitPrivateForMemory(
+                withDestType(CGM.getContext(), Init, EmbedS->getType(), DestTy,
+                             Value),
+                EltType);
+          }
+          if (!C)
+            return nullptr;
+          Elts.push_back(C);
+          ArrayIndex++;
+        }
+        if ((ArrayIndex - EmbedS->getDataElementCount()) == 0)
+          CommonElementType = C->getType();
+        else if (C->getType() != CommonElementType)
+          CommonElementType = nullptr;
+      } else {
+        if (!Emit(Init, ArrayIndex))
+          return nullptr;
+        ArrayIndex++;
+      }
     }
 
     llvm::ArrayType *Desired =
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 7e76e57bc3f02..f40f3c273206b 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -506,6 +506,7 @@ class ScalarExprEmitter
   }
 
   Value *VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E);
+  Value *VisitEmbedExpr(EmbedExpr *E);
 
   Value *VisitOpaqueValueExpr(OpaqueValueExpr *E) {
     if (E->isGLValue())
@@ -1796,6 +1797,12 @@ ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
                                                      "usn_addr_cast");
 }
 
+Value *ScalarExprEmitter::VisitEmbedExpr(EmbedExpr *E) {
+  assert(E->getDataElementCount() == 1);
+  auto It = E->begin();
+  return Builder.getInt((*It)->getValue());
+}
+
 Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
   // Vector Mask Case
   if (E->getNumSubExprs() == 2) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 331cf6e713d89..2ce9e2f4bcfcd 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1220,7 +1220,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
 
   Args.addAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
-                   options::OPT_F, options::OPT_index_header_map});
+                   options::OPT_F, options::OPT_index_header_map,
+                   options::OPT_embed_dir_EQ});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
@@ -8505,6 +8506,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
+  // Pass along any --embed-dir or similar options so we get proper embed paths.
+  Args.AddAllArgs(CmdArgs, options::OPT_embed_dir_EQ);
+
   // Determine the original source input.
   auto FindSource = [](const Action *S) -> const Action * {
     while (S->getKind() != Action::InputClass) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 58694e5399d58..cde4a84673b6e 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4492,6 +4492,9 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
   if (Opts.DefineTargetOSMacros)
     GenerateArg(Consumer, OPT_fdefine_target_os_macros);
 
+  for (const auto &EmbedEntry : Opts.EmbedEntries)
+    GenerateArg(Consumer, OPT_embed_dir_EQ, EmbedEntry);
+
   // Don't handle LexEditorPlaceholders. It is implied by the action that is
   // generated elsewhere.
 }
@@ -4584,6 +4587,11 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     }
   }
 
+  for (const auto *A : Args.filtered(OPT_embed_dir_EQ)) {
+    StringRef Val = A->getValue();
+    Opts.EmbedEntries.push_back(std::string(Val));
+  }
+
   // Always avoid lexing editor placeholders when we're just running the
   // preprocessor as we never want to emit the
   // "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 369816e89e1d6..528eae2c5283e 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -62,6 +62,19 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
                                     /*IsMissing=*/false);
   }
 
+  void EmbedDirective(SourceLocation, StringRef, bool,
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &) override {
+    assert(File && "expected to only be called when the file is found");
+    StringRef FileName =
+        llvm::sys::path::remove_leading_dotslash(File->getName());
+    DepCollector.maybeAddDependency(FileName,
+                                    /*FromModule*/ false,
+                                    /*IsSystem*/ false,
+                                    /*IsModuleFile*/ false,
+                                    /*IsMissing*/ false);
+  }
+
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -77,6 +90,18 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
     // Files that actually exist are handled by FileChanged.
   }
 
+  void HasEmbed(SourceLocation, StringRef, bool,
+                OptionalFileEntryRef File) override {
+    if (!File)
+      return;
+    StringRef Filename =
+        llvm::sys::path::remove_leading_dotslash(File->getName());
+    DepCollector.maybeAddDependency(Filename,
+                                    /*FromModule=*/false, false,
+                                    /*IsModuleFile=*/false,
+                                    /*IsMissing=*/false);
+  }
+
   void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
                   OptionalFileEntryRef File,
                   SrcMgr::CharacteristicKind FileType) override {
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 20e5f233e224e..c23ce66a40dd0 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -43,7 +43,7 @@ class DependencyGraphCallback : public PPCallbacks {
 public:
   DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
                           StringRef SysRoot)
-    : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
+      : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {}
 
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
@@ -53,6 +53,10 @@ class DependencyGraphCallback : public PPCallbacks {
                           bool ModuleImported,
                           SrcMgr::CharacteristicKind FileType) override;
 
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override;
+
   void EndOfMainFile() override {
     OutputGraphFile();
   }
@@ -86,6 +90,24 @@ void DependencyGraphCallback::InclusionDirective(
   AllFiles.insert(*FromFile);
 }
 
+void DependencyGraphCallback::EmbedDirective(SourceLocation HashLoc, StringRef,
+                                             bool, OptionalFileEntryRef File,
+                                             const LexEmbedParametersResult &) {
+  if (!File)
+    return;
+
+  SourceManager &SM = PP->getSourceManager();
+  OptionalFileEntryRef FromFile =
+      SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc)));
+  if (!FromFile)
+    return;
+
+  Dependencies[*FromFile].push_back(*File);
+
+  AllFiles.insert(*File);
+  AllFiles.insert(*FromFile);
+}
+
 raw_ostream &
 DependencyGraphCallback::writeNodeReference(raw_ostream &OS,
                                             const FileEntry *Node) {
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index e8c8a5175f8f4..2d5c94c760252 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -508,6 +508,14 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__STDC_UTF_16__", "1");
   Builder.defineMacro("__STDC_UTF_32__", "1");
 
+  // __has_embed definitions
+  Builder.defineMacro("__STDC_EMBED_NOT_FOUND__",
+                      llvm::itostr(static_cast<int>(EmbedResult::NotFound)));
+  Builder.defineMacro("__STDC_EMBED_FOUND__",
+                      llvm::itostr(static_cast<int>(EmbedResult::Found)));
+  Builder.defineMacro("__STDC_EMBED_EMPTY__",
+                      llvm::itostr(static_cast<int>(EmbedResult::Empty)));
+
   if (LangOpts.ObjC)
     Builder.defineMacro("__OBJC__");
 
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index a26d2c3ab8582..0592423c12eca 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Frontend/Utils.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/PreprocessorOutputOptions.h"
+#include "clang/Frontend/Utils.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Pragma.h"
@@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   bool DisableLineMarkers;
   bool DumpDefines;
   bool DumpIncludeDirectives;
+  bool DumpEmbedDirectives;
   bool UseLineDirectives;
   bool IsFirstFileEntered;
   bool MinimizeWhitespace;
@@ -100,6 +101,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   bool KeepSystemIncludes;
   raw_ostream *OrigOS;
   std::unique_ptr<llvm::raw_null_ostream> NullOS;
+  unsigned NumToksToSkip;
 
   Token PrevTok;
   Token PrevPrevTok;
@@ -107,14 +109,16 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 public:
   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
                            bool defines, bool DumpIncludeDirectives,
-                           bool UseLineDirectives, bool MinimizeWhitespace,
-                           bool DirectivesOnly, bool KeepSystemIncludes)
+                           bool DumpEmbedDirectives, bool UseLineDirectives,
+                           bool MinimizeWhitespace, bool DirectivesOnly,
+                           bool KeepSystemIncludes)
       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
         DisableLineMarkers(lineMarkers), DumpDefines(defines),
         DumpIncludeDirectives(DumpIncludeDirectives),
+        DumpEmbedDirectives(DumpEmbedDirectives),
         UseLineDirectives(UseLineDirectives),
         MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
-        KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
+        KeepSystemIncludes(KeepSystemIncludes), OrigOS(os), NumToksToSkip(0) {
     CurLine = 0;
     CurFilename += "<uninit>";
     EmittedTokensOnThisLine = false;
@@ -129,6 +133,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
     PrevPrevTok.startToken();
   }
 
+  /// Returns true if #embed directives should be expanded into a comma-
+  /// delimited list of integer constants or not.
+  bool expandEmbedContents() const { return !DumpEmbedDirectives; }
+
   bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
 
   void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
@@ -149,6 +157,9 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
                    FileID PrevFID) override;
+  void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+                      OptionalFileEntryRef File,
+                      const LexEmbedParametersResult &Params) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
                           CharSourceRange FilenameRange,
@@ -232,6 +243,9 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
 
   void BeginModule(const Module *M);
   void EndModule(const Module *M);
+
+  unsigned GetNumToksToSkip() const { return NumToksToSkip; }
+  void ResetSkipToks() { NumToksToSkip = 0; }
 };
 }  // end anonymous namespace
 
@@ -399,6 +413,74 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
   }
 }
 
+void PrintPPOutputPPCallbacks::EmbedDirective(
+    SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+    OptionalFileEntryRef File, const LexEmbedParametersResult &Params) {
+  if (!DumpEmbedDirectives)
+    return;
+
+  // The EmbedDirective() callback is called before we produce the annotation
+  // token stream for the directive. We skip printing the annotation tokens
+  // within PrintPreprocessedTokens(), but we also need to skip the prefix,
+  // suffix, and if_empty tokens as those are inserted directly into the token
+  // stream and would otherwise be printed immediately after printing the
+  // #embed directive.
+  //
+  // FIXME: counting tokens to skip is a kludge but we have no way to know
+  // which tokens were inserted as part of the embed and which ones were
+  // explicitly written by the user.
+  MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+  *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+      << (IsAngled ? '>' : '"');
+
+  auto PrintToks = [&](llvm::ArrayRef<Token> Toks) {
+    SmallString<128> SpellingBuffer;
+    for (const Token &T : Toks) {
+      if (T.hasLeadingSpace())
+        *OS << " ";
+      *OS << PP.getSpelling(T, SpellingBuffer);
+    }
+  };
+  bool SkipAnnotToks = true;
+  if (Params.MaybeIfEmptyParam) {
+    *OS << " if_empty(";
+    PrintToks(Params.MaybeIfEmptyParam->Tokens);
+    *OS << ")";
+    // If the file is empty, we can skip those tokens. If the file is not
+    // empty, we skip the annotation tokens.
+    if (File && !File->getSize()) {
+      NumToksToSkip += Params.MaybeIfEmptyParam->Tokens.size();
+      SkipAnnotToks = false;
+    }
+  }
+
+  if (Params.MaybeLimitParam) {
+    *OS << " limit(" << Params.MaybeLimitParam->Limit << ")";
+  }
+  if (Params.MaybeOffsetParam) {
+    *OS << " clang::offset(" << Params.MaybeOffsetParam->Offset << ")";
+  }
+  if (Params.MaybePrefixParam) {
+    *OS << " prefix(";
+    PrintToks(Params.MaybePrefixParam->Tokens);
+    *OS << ")";
+    NumToksToSkip += Params.MaybePrefixParam->Tokens.size();
+  }
+  if (Params.MaybeSuffixParam) {
+    *OS << " suffix(";
+    PrintToks(Params.MaybeSuffixParam->Tokens);
+    *OS << ")";
+    NumToksToSkip += Params.MaybeSuffixParam->Tokens.size();
+  }
+
+  // We may need to skip the annotation token.
+  if (SkipAnnotToks)
+    NumToksToSkip++;
+
+  *OS << " /* clang -E -dE */";
+  setEmittedDirectiveOnThisLine();
+}
+
 void PrintPPOutputPPCallbacks::InclusionDirective(
     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
@@ -678,7 +760,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
   if (Tok.is(tok::eof) ||
       (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
        !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
-       !Tok.is(tok::annot_repl_input_end)))
+       !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed)))
     return;
 
   // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -878,6 +960,27 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       std::string Name = M->getFullModuleName();
       Callbacks->OS->write(Name.data(), Name.size());
       Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
+    } else if (Tok.is(tok::annot_embed)) {
+      // Manually explode the binary data out to a stream of comma-delimited
+      // integer values. If the user passed -dE, that is handled by the
+      // EmbedDirective() callback. We should only get here if the user did not
+      // pass -dE.
+      assert(Callbacks->expandEmbedContents() &&
+             "did not expect an embed annotation");
+      auto *Data =
+          reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+
+      // Loop over the contents and print them as a comma-delimited list of
+      // values.
+      bool PrintComma = false;
+      for (auto Iter = Data->BinaryData.begin(), End = Data->BinaryData.end();
+           Iter != End; ++Iter) {
+        if (PrintComma)
+          *Callbacks->OS << ", ";
+        *Callbacks->OS << static_cast<unsigned>(*Iter);
+        PrintComma = true;
+      }
+      IsStartOfLine = true;
     } else if (Tok.isAnnotation()) {
       // Ignore annotation tokens created by pragmas - the pragmas themselves
       // will be reproduced in the preprocessed output.
@@ -926,6 +1029,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
     if (Tok.is(tok::eof)) break;
 
     PP.Lex(Tok);
+    // If lexing that token causes us to need to skip future tokens, do so now.
+    for (unsigned I = 0, Skip = Callbacks->GetNumToksToSkip(); I < Skip; ++I)
+      PP.Lex(Tok);
+    Callbacks->ResetSkipToks();
   }
 }
 
@@ -982,8 +1089,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
-      Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
-      Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
+      Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives,
+      Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+      Opts.KeepSystemIncludes);
 
   // Expand macros in pragmas with -fms-extensions.  The assumption is that
   // the majority of pragmas in such a file will be Microsoft pragmas.
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 8e7386449dced..b7ee0c0edb053 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -19,6 +19,7 @@
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/HeaderSearch.h"
@@ -39,6 +40,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/AlignOf.h"
@@ -82,8 +84,7 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
 
 /// Read and discard all tokens remaining on the current line until
 /// the tok::eod token is found.
-SourceRange Preprocessor::DiscardUntilEndOfDirective() {
-  Token Tmp;
+SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
   SourceRange Res;
 
   LexUnexpandedToken(Tmp);
@@ -1073,6 +1074,74 @@ OptionalFileEntryRef Preprocessor::LookupFile(
   return std::nullopt;
 }
 
+OptionalFileEntryRef
+Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
+                              const FileEntry *LookupFromFile) {
+  FileManager &FM = this->getFileManager();
+  if (llvm::sys::path::is_absolute(Filename)) {
+    // lookup path or immediately fail
+    llvm::Expected<FileEntryRef> ShouldBeEntry =
+        FM.getFileRef(Filename, OpenFile);
+    return llvm::expectedToOptional(std::move(ShouldBeEntry));
+  }
+
+  auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
+                               StringRef StartingFrom, StringRef FileName,
+                               bool RemoveInitialFileComponentFromLookupPath) {
+    llvm::sys::path::native(StartingFrom, LookupPath);
+    if (RemoveInitialFileComponentFromLookupPath)
+      llvm::sys::path::remove_filename(LookupPath);
+    if (!LookupPath.empty() &&
+        !llvm::sys::path::is_separator(LookupPath.back())) {
+      LookupPath.push_back(llvm::sys::path::get_separator().front());
+    }
+    LookupPath.append(FileName.begin(), FileName.end());
+  };
+
+  // Otherwise, it's search time!
+  SmallString<512> LookupPath;
+  // Non-angled lookup
+  if (!isAngled) {
+    if (LookupFromFile) {
+      // Use file-based lookup.
+      StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
+      if (!FullFileDir.empty()) {
+        SeparateComponents(LookupPath, FullFileDir, Filename, true);
+        llvm::Expected<FileEntryRef> ShouldBeEntry =
+            FM.getFileRef(LookupPath, OpenFile);
+        if (ShouldBeEntry)
+          return llvm::expectedToOptional(std::move(ShouldBeEntry));
+        llvm::consumeError(ShouldBeEntry.takeError());
+      }
+    }
+
+    // Otherwise, do working directory lookup.
+    LookupPath.clear();
+    auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
+    if (MaybeWorkingDirEntry) {
+      DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
+      StringRef WorkingDir = WorkingDirEntry.getName();
+      if (!WorkingDir.empty()) {
+        SeparateComponents(LookupPath, WorkingDir, Filename, false);
+        llvm::Expected<FileEntryRef> ShouldBeEntry =
+            FM.getFileRef(LookupPath, OpenFile);
+        if (ShouldBeEntry)
+          return llvm::expectedToOptional(std::move(ShouldBeEntry));
+        llvm::consumeError(ShouldBeEntry.takeError());
+      }
+    }
+  }
+
+  for (const auto &Entry : PPOpts->EmbedEntries) {
+    LookupPath.clear();
+    SeparateComponents(LookupPath, Entry, Filename, false);
+    llvm::Expected<FileEntryRef> ShouldBeEntry =
+        FM.getFileRef(LookupPath, OpenFile);
+    return llvm::expectedToOptional(std::move(ShouldBeEntry));
+  }
+  return std::nullopt;
+}
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Directive Handling.
 //===----------------------------------------------------------------------===//
@@ -1168,6 +1237,7 @@ void Preprocessor::HandleDirective(Token &Result) {
       case tok::pp_include_next:
       case tok::pp___include_macros:
       case tok::pp_pragma:
+      case tok::pp_embed:
         Diag(Result, diag::err_embedded_directive) << II->getName();
         Diag(*ArgMacro, diag::note_macro_expansion_here)
             << ArgMacro->getIdentifierInfo();
@@ -1282,6 +1352,11 @@ void Preprocessor::HandleDirective(Token &Result) {
       return HandleIdentSCCSDirective(Result);
     case tok::pp_sccs:
       return HandleIdentSCCSDirective(Result);
+    case tok::pp_embed:
+      return HandleEmbedDirective(SavedHash.getLocation(), Result,
+                                  getCurrentFileLexer()
+                                      ? *getCurrentFileLexer()->getFileEntry()
+                                      : static_cast<FileEntry *>(nullptr));
     case tok::pp_assert:
       //isExtension = true;  // FIXME: implement #assert
       break;
@@ -3543,3 +3618,401 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
 }
+
+std::optional<LexEmbedParametersResult>
+Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
+  LexEmbedParametersResult Result{};
+  SmallVector<Token, 2> ParameterTokens;
+  tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
+  Result.ParamRange = {CurTok.getLocation(), CurTok.getLocation()};
+
+  auto DiagMismatchedBracesAndSkipToEOD =
+      [&](tok::TokenKind Expected,
+          std::pair<tok::TokenKind, SourceLocation> Matches) {
+        Result.ParamRange.setEnd(CurTok.getEndLoc());
+        Diag(CurTok, diag::err_expected) << Expected;
+        Diag(Matches.second, diag::note_matching) << Matches.first;
+        if (CurTok.isNot(tok::eod))
+          DiscardUntilEndOfDirective(CurTok);
+      };
+
+  auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
+    if (CurTok.isNot(Kind)) {
+      Result.ParamRange.setEnd(CurTok.getEndLoc());
+      Diag(CurTok, diag::err_expected) << Kind;
+      if (CurTok.isNot(tok::eod))
+        DiscardUntilEndOfDirective(CurTok);
+      return false;
+    }
+    return true;
+  };
+
+  // C23 6.10:
+  // pp-parameter-name:
+  //   pp-standard-parameter
+  //   pp-prefixed-parameter
+  //
+  // pp-standard-parameter:
+  //   identifier
+  //
+  // pp-prefixed-parameter:
+  //   identifier :: identifier
+  auto LexPPParameterName = [&]() -> std::optional<std::string> {
+    // We expect the current token to be an identifier; if it's not, things
+    // have gone wrong.
+    if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
+      return std::nullopt;
+
+    const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
+
+    // Lex another token; it is either a :: or we're done with the parameter
+    // name.
+    LexNonComment(CurTok);
+    if (CurTok.is(tok::coloncolon)) {
+      // We found a ::, so lex another identifier token.
+      LexNonComment(CurTok);
+      if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
+        return std::nullopt;
+
+      const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
+
+      // Lex another token so we're past the name.
+      LexNonComment(CurTok);
+      return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
+    }
+    return Prefix->getName().str();
+  };
+
+  // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
+  // this document as an identifier pp_param and an identifier of the form
+  // __pp_param__ shall behave the same when used as a preprocessor parameter,
+  // except for the spelling.
+  auto NormalizeParameterName = [](StringRef Name) {
+    if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
+      return Name.substr(2, Name.size() - 4);
+    return Name;
+  };
+
+  auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
+    // we have a limit parameter and its internals are processed using
+    // evaluation rules from #if.
+    if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
+      return std::nullopt;
+
+    // We do not consume the ( because EvaluateDirectiveExpression will lex
+    // the next token for us.
+    IdentifierInfo *ParameterIfNDef = nullptr;
+    bool EvaluatedDefined;
+    DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
+        ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
+
+    if (!LimitEvalResult.Value) {
+      // If there was an error evaluating the directive expression, we expect
+      // to be at the end of directive token.
+      assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
+      return std::nullopt;
+    }
+
+    if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
+      return std::nullopt;
+
+    // Eat the ).
+    LexNonComment(CurTok);
+
+    // C23 6.10.3.2p2: The token defined shall not appear within the constant
+    // expression.
+    if (EvaluatedDefined) {
+      Diag(CurTok, diag::err_defined_in_pp_embed);
+      return std::nullopt;
+    }
+
+    if (LimitEvalResult.Value) {
+      const llvm::APSInt &Result = *LimitEvalResult.Value;
+      if (Result.isNegative()) {
+        Diag(CurTok, diag::err_requires_positive_value)
+            << toString(Result, 10) << /*positive*/ 0;
+        return std::nullopt;
+      }
+      return Result.getLimitedValue();
+    }
+    return std::nullopt;
+  };
+
+  auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
+    switch (Kind) {
+    case tok::l_paren:
+      return tok::r_paren;
+    case tok::l_brace:
+      return tok::r_brace;
+    case tok::l_square:
+      return tok::r_square;
+    default:
+      llvm_unreachable("should not get here");
+    }
+  };
+
+  auto LexParenthesizedBalancedTokenSoup =
+      [&](llvm::SmallVectorImpl<Token> &Tokens) {
+        std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
+
+        // We expect the current token to be a left paren.
+        if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
+          return false;
+        LexNonComment(CurTok); // Eat the (
+
+        bool WaitingForInnerCloseParen = false;
+        while (CurTok.isNot(tok::eod) &&
+               (WaitingForInnerCloseParen ||
+                (!WaitingForInnerCloseParen && CurTok.isNot(tok::r_paren)))) {
+          switch (CurTok.getKind()) {
+          default: // Shutting up diagnostics about not fully-covered switch.
+            break;
+          case tok::l_paren:
+            WaitingForInnerCloseParen = true;
+            [[fallthrough]];
+          case tok::l_brace:
+          case tok::l_square:
+            BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
+            break;
+          case tok::r_paren:
+            WaitingForInnerCloseParen = false;
+            [[fallthrough]];
+          case tok::r_brace:
+          case tok::r_square: {
+            tok::TokenKind Matching =
+                GetMatchingCloseBracket(BracketStack.back().first);
+            if (BracketStack.empty() || CurTok.getKind() != Matching) {
+              DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
+              return false;
+            }
+            BracketStack.pop_back();
+          } break;
+          }
+          Tokens.push_back(CurTok);
+          LexNonComment(CurTok);
+        }
+
+        // When we're done, we want to eat the closing paren.
+        if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
+          return false;
+
+        LexNonComment(CurTok); // Eat the )
+        return true;
+      };
+
+  LexNonComment(CurTok); // Prime the pump.
+  while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
+    SourceLocation ParamStartLoc = CurTok.getLocation();
+    std::optional<std::string> ParamName = LexPPParameterName();
+    if (!ParamName)
+      return std::nullopt;
+    StringRef Parameter = NormalizeParameterName(*ParamName);
+
+    // Lex the parameters (dependent on the parameter type we want!).
+    //
+    // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
+    // one time in the embed parameter sequence.
+    if (Parameter == "limit") {
+      if (Result.MaybeLimitParam)
+        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+      std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
+      if (!Limit)
+        return std::nullopt;
+      Result.MaybeLimitParam =
+          PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
+    } else if (Parameter == "clang::offset") {
+      if (Result.MaybeOffsetParam)
+        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+      std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
+      if (!Offset)
+        return std::nullopt;
+      Result.MaybeOffsetParam = PPEmbedParameterOffset{
+          *Offset, {ParamStartLoc, CurTok.getLocation()}};
+    } else if (Parameter == "prefix") {
+      if (Result.MaybePrefixParam)
+        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+      SmallVector<Token, 4> Soup;
+      if (!LexParenthesizedBalancedTokenSoup(Soup))
+        return std::nullopt;
+      Result.MaybePrefixParam = PPEmbedParameterPrefix{
+          std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
+    } else if (Parameter == "suffix") {
+      if (Result.MaybeSuffixParam)
+        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+      SmallVector<Token, 4> Soup;
+      if (!LexParenthesizedBalancedTokenSoup(Soup))
+        return std::nullopt;
+      Result.MaybeSuffixParam = PPEmbedParameterSuffix{
+          std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
+    } else if (Parameter == "if_empty") {
+      if (Result.MaybeIfEmptyParam)
+        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+      SmallVector<Token, 4> Soup;
+      if (!LexParenthesizedBalancedTokenSoup(Soup))
+        return std::nullopt;
+      Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
+          std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
+    } else {
+      ++Result.UnrecognizedParams;
+
+      // If there's a left paren, we need to parse a balanced token sequence
+      // and just eat those tokens.
+      if (CurTok.is(tok::l_paren)) {
+        SmallVector<Token, 4> Soup;
+        if (!LexParenthesizedBalancedTokenSoup(Soup))
+          return std::nullopt;
+      }
+      if (!ForHasEmbed) {
+        Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;
+        return std::nullopt;
+      }
+    }
+  }
+  Result.ParamRange.setEnd(CurTok.getLocation());
+  return Result;
+}
+
+void Preprocessor::HandleEmbedDirectiveImpl(
+    SourceLocation HashLoc, StringRef ResolvedFilename,
+    const LexEmbedParametersResult &Params, StringRef BinaryContents) {
+  if (BinaryContents.empty()) {
+    // If we have no binary contents, the only thing we need to emit are the
+    // if_empty tokens, if any.
+    // FIXME: this loses AST fidelity; nothing in the compiler will see that
+    // these tokens came from #embed. We have to hack around this when printing
+    // preprocessed output. The same is true for prefix and suffix tokens.
+    if (Params.MaybeIfEmptyParam) {
+      ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
+      size_t TokCount = Toks.size();
+      auto NewToks = std::make_unique<Token[]>(TokCount);
+      llvm::copy(Toks, NewToks.get());
+      EnterTokenStream(std::move(NewToks), TokCount, true, true);
+    }
+    return;
+  }
+
+  size_t NumPrefixToks = Params.PrefixTokenCount(),
+         NumSuffixToks = Params.SuffixTokenCount();
+  size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
+  size_t CurIdx = 0;
+  auto Toks = std::make_unique<Token[]>(TotalNumToks);
+
+  // Add the prefix tokens, if any.
+  if (Params.MaybePrefixParam) {
+    llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
+    CurIdx += NumPrefixToks;
+  }
+
+  EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
+  Data->FileName = ResolvedFilename;
+  Data->BinaryData = BinaryContents;
+
+  Toks[CurIdx].startToken();
+  Toks[CurIdx].setKind(tok::annot_embed);
+  Toks[CurIdx].setAnnotationRange(HashLoc);
+  Toks[CurIdx++].setAnnotationValue(Data);
+
+  // Now add the suffix tokens, if any.
+  if (Params.MaybeSuffixParam) {
+    llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
+    CurIdx += NumSuffixToks;
+  }
+
+  assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
+  EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
+}
+
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+                                        const FileEntry *LookupFromFile) {
+  // Give the usual extension/compatibility warnings.
+  if (LangOpts.C23)
+    Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
+  else
+    Diag(EmbedTok, diag::ext_pp_embed_directive)
+        << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
+
+  // Parse the filename header
+  Token FilenameTok;
+  if (LexHeaderName(FilenameTok))
+    return;
+
+  if (FilenameTok.isNot(tok::header_name)) {
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    if (FilenameTok.isNot(tok::eod))
+      DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // Parse the optional sequence of
+  // directive-parameters:
+  //     identifier parameter-name-list[opt] directive-argument-list[opt]
+  // directive-argument-list:
+  //    '(' balanced-token-sequence ')'
+  // parameter-name-list:
+  //    '::' identifier parameter-name-list[opt]
+  Token CurTok;
+  std::optional<LexEmbedParametersResult> Params =
+      LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
+
+  assert((Params || CurTok.is(tok::eod)) &&
+         "expected success or to be at the end of the directive");
+  if (!Params)
+    return;
+
+  // Now, splat the data out!
+  SmallString<128> FilenameBuffer;
+  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+  StringRef OriginalFilename = Filename;
+  bool isAngled =
+      GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  assert(!Filename.empty());
+  OptionalFileEntryRef MaybeFileRef =
+      this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
+  if (!MaybeFileRef) {
+    // could not find file
+    if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
+      return;
+    }
+    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+    return;
+  }
+  std::optional<llvm::MemoryBufferRef> MaybeFile =
+      getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
+  if (!MaybeFile) {
+    // could not find file
+    Diag(FilenameTok, diag::err_cannot_open_file)
+        << Filename << "a buffer to the contents could not be created";
+    return;
+  }
+  StringRef BinaryContents = MaybeFile->getBuffer();
+
+  // The order is important between 'offset' and 'limit'; we want to offset
+  // first and then limit second; otherwise we may reduce the notional resource
+  // size to something too small to offset into.
+  if (Params->MaybeOffsetParam) {
+    // FIXME: just like with the limit() and if_empty() parameters, this loses
+    // source fidelity in the AST; it has no idea that there was an offset
+    // involved.
+    // offsets all the way to the end of the file make for an empty file.
+    BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
+  }
+
+  if (Params->MaybeLimitParam) {
+    // FIXME: just like with the clang::offset() and if_empty() parameters,
+    // this loses source fidelity in the AST; it has no idea there was a limit
+    // involved.
+    BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
+  }
+
+  if (Callbacks)
+    Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
+                              *Params);
+  HandleEmbedDirectiveImpl(HashLoc, Filename, *Params, BinaryContents);
+}
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index f267efabd617f..8bb82bd22eb98 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -870,7 +870,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
 /// may occur after a #if or #elif directive.  If the expression is equivalent
 /// to "!defined(X)" return X in IfNDefMacro.
 Preprocessor::DirectiveEvalResult
-Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                          Token &Tok, bool &EvaluatedDefined,
+                                          bool CheckForEoD) {
   SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
@@ -882,7 +884,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
   DisableMacroExpansion = false;
 
   // Peek ahead one token.
-  Token Tok;
   LexNonComment(Tok);
 
   // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
@@ -895,7 +896,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
     // Parse error, skip the rest of the macro line.
     SourceRange ConditionRange = ExprStartLoc;
     if (Tok.isNot(tok::eod))
-      ConditionRange = DiscardUntilEndOfDirective();
+      ConditionRange = DiscardUntilEndOfDirective(Tok);
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
@@ -903,11 +904,14 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
     // We cannot trust the source range from the value because there was a
     // parse error. Track the range manually -- the end of the directive is the
     // end of the condition range.
-    return {false,
+    return {std::nullopt,
+            false,
             DT.IncludedUndefinedIds,
             {ExprStartLoc, ConditionRange.getEnd()}};
   }
 
+  EvaluatedDefined = DT.State != DefinedTracker::Unknown;
+
   // If we are at the end of the expression after just parsing a value, there
   // must be no (unparenthesized) binary operators involved, so we can exit
   // directly.
@@ -919,7 +923,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+    bool IsNonZero = ResVal.Val != 0;
+    SourceRange ValRange = ResVal.getRange();
+    return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+            ValRange};
   }
 
   // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
@@ -928,21 +935,37 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
                                Tok, true, DT.IncludedUndefinedIds, *this)) {
     // Parse error, skip the rest of the macro line.
     if (Tok.isNot(tok::eod))
-      DiscardUntilEndOfDirective();
+      DiscardUntilEndOfDirective(Tok);
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
+    SourceRange ValRange = ResVal.getRange();
+    return {std::nullopt, false, DT.IncludedUndefinedIds, ValRange};
   }
 
-  // If we aren't at the tok::eod token, something bad happened, like an extra
-  // ')' token.
-  if (Tok.isNot(tok::eod)) {
-    Diag(Tok, diag::err_pp_expected_eol);
-    DiscardUntilEndOfDirective();
+  if (CheckForEoD) {
+    // If we aren't at the tok::eod token, something bad happened, like an extra
+    // ')' token.
+    if (Tok.isNot(tok::eod)) {
+      Diag(Tok, diag::err_pp_expected_eol);
+      DiscardUntilEndOfDirective(Tok);
+    }
   }
 
+  EvaluatedDefined = EvaluatedDefined || DT.State != DefinedTracker::Unknown;
+
   // Restore 'DisableMacroExpansion'.
   DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-  return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+  bool IsNonZero = ResVal.Val != 0;
+  SourceRange ValRange = ResVal.getRange();
+  return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange};
+}
+
+Preprocessor::DirectiveEvalResult
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+                                          bool CheckForEoD) {
+  Token Tok;
+  bool EvaluatedDefined;
+  return EvaluateDirectiveExpression(IfNDefMacro, Tok, EvaluatedDefined,
+                                     CheckForEoD);
 }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index f085b94371644..3913ff08c2eb5 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() {
     Ident__has_c_attribute = nullptr;
 
   Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
+  Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed");
   Ident__has_include      = RegisterBuiltinMacro(*this, "__has_include");
   Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
   Ident__has_warning      = RegisterBuiltinMacro(*this, "__has_warning");
@@ -1279,6 +1280,105 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
   return File.has_value();
 }
 
+/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
+/// Returns a filled optional with the value if successful; otherwise, empty.
+EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+  // These expressions are only allowed within a preprocessor directive.
+  if (!this->isParsingIfOrElifDirective()) {
+    Diag(Tok, diag::err_pp_directive_required) << II;
+    // Return a valid identifier token.
+    assert(Tok.is(tok::identifier));
+    Tok.setIdentifierInfo(II);
+    return EmbedResult::Invalid;
+  }
+
+  // Ensure we have a '('.
+  LexUnexpandedToken(Tok);
+  if (Tok.isNot(tok::l_paren)) {
+    Diag(Tok, diag::err_pp_expected_after) << II << tok::l_paren;
+    // If the next token looks like a filename or the start of one,
+    // assume it is and process it as such.
+    return EmbedResult::Invalid;
+  }
+
+  // Save '(' location for possible missing ')' message and then lex the header
+  // name token for the embed resource.
+  SourceLocation LParenLoc = Tok.getLocation();
+  if (this->LexHeaderName(Tok))
+    return EmbedResult::Invalid;
+
+  if (Tok.isNot(tok::header_name)) {
+    Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+    return EmbedResult::Invalid;
+  }
+
+  SourceLocation FilenameLoc = Tok.getLocation();
+  Token FilenameTok = Tok;
+
+  std::optional<LexEmbedParametersResult> Params =
+      this->LexEmbedParameters(Tok, /*ForHasEmbed=*/true);
+  assert((Params || Tok.is(tok::eod)) &&
+         "expected success or to be at the end of the directive");
+
+  if (!Params)
+    return EmbedResult::Invalid;
+
+  if (Params->UnrecognizedParams > 0)
+    return EmbedResult::NotFound;
+
+  if (!Tok.is(tok::r_paren)) {
+    Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
+        << II << tok::r_paren;
+    Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+    if (Tok.isNot(tok::eod))
+      DiscardUntilEndOfDirective();
+    return EmbedResult::Invalid;
+  }
+
+  SmallString<128> FilenameBuffer;
+  StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
+  bool isAngled =
+      this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  assert(!Filename.empty());
+  const FileEntry *LookupFromFile =
+      this->getCurrentFileLexer() ? *this->getCurrentFileLexer()->getFileEntry()
+                                  : static_cast<FileEntry *>(nullptr);
+  OptionalFileEntryRef MaybeFileEntry =
+      this->LookupEmbedFile(Filename, isAngled, false, LookupFromFile);
+  if (Callbacks) {
+    Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
+  }
+  if (!MaybeFileEntry)
+    return EmbedResult::NotFound;
+
+  size_t FileSize = MaybeFileEntry->getSize();
+  // First, "offset" into the file (this reduces the amount of data we can read
+  // from the file).
+  if (Params->MaybeOffsetParam) {
+    if (Params->MaybeOffsetParam->Offset > FileSize)
+      FileSize = 0;
+    else
+      FileSize -= Params->MaybeOffsetParam->Offset;
+  }
+
+  // Second, limit the data from the file (this also reduces the amount of data
+  // we can read from the file).
+  if (Params->MaybeLimitParam) {
+    if (Params->MaybeLimitParam->Limit > FileSize)
+      FileSize = 0;
+    else
+      FileSize = Params->MaybeLimitParam->Limit;
+  }
+
+  // If we have no data left to read, the file is empty, otherwise we have the
+  // expected resource.
+  if (FileSize == 0)
+    return EmbedResult::Empty;
+  return EmbedResult::Found;
+}
+
 bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
   return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr);
 }
@@ -1820,6 +1920,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
       return;
     OS << (int)Value;
     Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__has_embed) {
+    // The argument to these two builtins should be a parenthesized
+    // file name string literal using angle brackets (<>) or
+    // double-quotes (""), optionally followed by a series of
+    // arguments similar to form like attributes.
+    EmbedResult Value = EvaluateHasEmbed(Tok, II);
+    if (Value == EmbedResult::Invalid)
+      return;
+
+    Tok.setKind(tok::numeric_constant);
+    OS << static_cast<int>(Value);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.
     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index 1b3201bd805bf..865879d180533 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -193,9 +193,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
   if (Tok.isAnnotation()) {
     // Modules annotation can show up when generated automatically for includes.
     assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin,
-                       tok::annot_module_end) &&
+                       tok::annot_module_end, tok::annot_embed) &&
            "unexpected annotation in AvoidConcat");
+
     ConcatInfo = 0;
+    if (Tok.is(tok::annot_embed))
+      return true;
   }
 
   if (ConcatInfo == 0)
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index eb7447fa038e4..9fc3cd73f73a0 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1066,6 +1066,21 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
     break;
   }
 
+  case tok::annot_embed: {
+    // We've met #embed in a context where a single value is expected. Take last
+    // element from #embed data as if it were a comma expression.
+    EmbedAnnotationData *Data =
+        reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+    SourceLocation StartLoc = ConsumeAnnotationToken();
+    ASTContext &Context = Actions.getASTContext();
+    Res = IntegerLiteral::Create(Context,
+                                 llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
+                                 Context.UnsignedCharTy, StartLoc);
+    if (Data->BinaryData.size() > 1)
+      Diag(StartLoc, diag::warn_unused_comma_left_operand);
+    break;
+  }
+
   case tok::kw___super:
   case tok::kw_decltype:
     // Annotate the token and tail recurse.
@@ -3563,6 +3578,17 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS,
                                   T.getCloseLocation());
 }
 
+void Parser::ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs) {
+  EmbedAnnotationData *Data =
+      reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+  SourceLocation StartLoc = ConsumeAnnotationToken();
+  ASTContext &Context = Actions.getASTContext();
+  for (auto Byte : Data->BinaryData) {
+    Exprs.push_back(IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
+                                           Context.UnsignedCharTy, StartLoc));
+  }
+}
+
 /// ParseExpressionList - Used for C/C++ (argument-)expression-list.
 ///
 /// \verbatim
@@ -3598,8 +3624,17 @@ bool Parser::ParseExpressionList(SmallVectorImpl<Expr *> &Exprs,
     if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) {
       Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists);
       Expr = ParseBraceInitializer();
-    } else
+    } else if (Tok.is(tok::annot_embed)) {
+      ExpandEmbedDirective(Exprs);
+      if (Tok.isNot(tok::comma))
+        break;
+      Token Comma = Tok;
+      ConsumeToken();
+      checkPotentialAngleBracketDelimiter(Comma);
+      continue;
+    } else {
       Expr = ParseAssignmentExpression();
+    }
 
     if (EarlyTypoCorrection)
       Expr = Actions.CorrectDelayedTyposInExpr(Expr);
diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp
index 432ddc74b1087..cd11f905e856a 100644
--- a/clang/lib/Parse/ParseInit.cpp
+++ b/clang/lib/Parse/ParseInit.cpp
@@ -428,6 +428,36 @@ ExprResult Parser::ParseInitializerWithPotentialDesignator(
   return ExprError();
 }
 
+ExprResult Parser::createEmbedExpr() {
+  assert(Tok.getKind() == tok::annot_embed);
+  EmbedAnnotationData *Data =
+      reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+  ExprResult Res;
+  ASTContext &Context = Actions.getASTContext();
+  SourceLocation StartLoc = ConsumeAnnotationToken();
+  if (Data->BinaryData.size() == 1) {
+    Res = IntegerLiteral::Create(Context,
+                                 llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
+                                 Context.UnsignedCharTy, StartLoc);
+  } else {
+    auto CreateStringLiteralFromStringRef = [&](StringRef Str, QualType Ty) {
+      llvm::APSInt ArraySize =
+          Context.MakeIntValue(Str.size(), Context.getSizeType());
+      QualType ArrayTy = Context.getConstantArrayType(
+          Ty, ArraySize, nullptr, ArraySizeModifier::Normal, 0);
+      return StringLiteral::Create(Context, Str, StringLiteralKind::Ordinary,
+                                   false, ArrayTy, StartLoc);
+    };
+
+    StringLiteral *FileNameArg =
+        CreateStringLiteralFromStringRef(Data->FileName, Context.CharTy);
+    StringLiteral *BinaryDataArg = CreateStringLiteralFromStringRef(
+        Data->BinaryData, Context.UnsignedCharTy);
+    Res = Actions.ActOnEmbedExpr(StartLoc, FileNameArg, BinaryDataArg);
+  }
+  return Res;
+}
+
 /// ParseBraceInitializer - Called when parsing an initializer that has a
 /// leading open brace.
 ///
@@ -501,6 +531,8 @@ ExprResult Parser::ParseBraceInitializer() {
     ExprResult SubElt;
     if (MayBeDesignationStart())
       SubElt = ParseInitializerWithPotentialDesignator(DesignatorCompletion);
+    else if (Tok.getKind() == tok::annot_embed)
+      SubElt = createEmbedExpr();
     else
       SubElt = ParseInitializer();
 
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index a5130f56600e5..7e30afa2c64a4 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -1523,6 +1523,19 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() {
                                 ExprArg.get(), Loc);
 }
 
+void Parser::ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs) {
+  EmbedAnnotationData *Data =
+      reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+  SourceLocation StartLoc = ConsumeAnnotationToken();
+  ASTContext &Context = Actions.getASTContext();
+  for (auto Byte : Data->BinaryData) {
+    Expr *E = IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
+                                     Context.UnsignedCharTy, StartLoc);
+    TemplateArgs.push_back(
+        ParsedTemplateArgument(ParsedTemplateArgument::NonType, E, StartLoc));
+  }
+}
+
 /// ParseTemplateArgumentList - Parse a C++ template-argument-list
 /// (C++ [temp.names]). Returns true if there was an error.
 ///
@@ -1547,19 +1560,23 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
 
   do {
     PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp);
-    ParsedTemplateArgument Arg = ParseTemplateArgument();
-    SourceLocation EllipsisLoc;
-    if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
-      Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);
-
-    if (Arg.isInvalid()) {
-      if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
-        RunSignatureHelp();
-      return true;
-    }
+    if (Tok.is(tok::annot_embed)) {
+      ExpandEmbedIntoTemplateArgList(TemplateArgs);
+    } else {
+      ParsedTemplateArgument Arg = ParseTemplateArgument();
+      SourceLocation EllipsisLoc;
+      if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
+        Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);
+
+      if (Arg.isInvalid()) {
+        if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
+          RunSignatureHelp();
+        return true;
+      }
 
-    // Save this template argument.
-    TemplateArgs.push_back(Arg);
+      // Save this template argument.
+      TemplateArgs.push_back(Arg);
+    }
 
     // If the next token is a comma, consume it and keep reading
     // arguments.
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 17acfca6b0112..0febfa85b93dd 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1414,6 +1414,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Expr::PackIndexingExprClass:
   case Expr::StringLiteralClass:
   case Expr::SourceLocExprClass:
+  case Expr::EmbedExprClass:
   case Expr::ConceptSpecializationExprClass:
   case Expr::RequiresExprClass:
     // These expressions can never throw.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4db8b4130c3c7..21229054c8d18 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3711,7 +3711,7 @@ bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) {
   bool ValueIsPositive =
       AllowZero ? ValueAPS.isNonNegative() : ValueAPS.isStrictlyPositive();
   if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) {
-    Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value)
+    Diag(E->getExprLoc(), diag::err_requires_positive_value)
         << toString(ValueAPS, 10) << ValueIsPositive;
     return true;
   }
@@ -7290,8 +7290,8 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
     }
   }
 
-  InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList,
-                                               RBraceLoc);
+  InitListExpr *E =
+      new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc);
   E->setType(Context.VoidTy); // FIXME: just a place holder for now.
   return E;
 }
@@ -16699,6 +16699,17 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
       SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext);
 }
 
+ExprResult Sema::ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
+                                StringLiteral *Filename,
+                                StringLiteral *BinaryData) {
+  EmbedDataStorage *Data = new (Context) EmbedDataStorage;
+  Data->Filename = Filename;
+  Data->BinaryData = BinaryData;
+  return new (Context)
+      EmbedExpr(Context, EmbedKeywordLoc, Data, /*NumOfElements=*/0,
+                Data->getDataElementCount());
+}
+
 static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType,
                                               const Expr *SrcExpr) {
   if (!DstType->isFunctionPointerType() ||
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index e805834c0fd38..d966dba51e2cc 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -313,6 +313,8 @@ class InitListChecker {
   InitListExpr *FullyStructuredList = nullptr;
   NoInitExpr *DummyExpr = nullptr;
   SmallVectorImpl<QualType> *AggrDeductionCandidateParamTypes = nullptr;
+  EmbedExpr *CurEmbed = nullptr; // Save current embed we're processing.
+  unsigned CurEmbedIndex = 0;
 
   NoInitExpr *getDummyInit() {
     if (!DummyExpr)
@@ -501,6 +503,42 @@ class InitListChecker {
   void CheckEmptyInitializable(const InitializedEntity &Entity,
                                SourceLocation Loc);
 
+  Expr *HandleEmbed(EmbedExpr *Embed, const InitializedEntity &Entity) {
+    Expr *Result = nullptr;
+    // Undrestand which part of embed we'd like to reference.
+    if (!CurEmbed) {
+      CurEmbed = Embed;
+      CurEmbedIndex = 0;
+    }
+    // Reference just one if we're initializing a single scalar.
+    uint64_t ElsCount = 1;
+    // Otherwise try to fill whole array with embed data.
+    if (Entity.getKind() == InitializedEntity::EK_ArrayElement) {
+      ValueDecl *ArrDecl = Entity.getParent()->getDecl();
+      auto *AType = SemaRef.Context.getAsArrayType(ArrDecl->getType());
+      assert(AType && "expected array type when initializing array");
+      ElsCount = Embed->getDataElementCount();
+      if (const auto *CAType = dyn_cast<ConstantArrayType>(AType))
+        ElsCount = std::min(CAType->getSize().getZExtValue(),
+                            ElsCount - CurEmbedIndex);
+      if (ElsCount == Embed->getDataElementCount()) {
+        CurEmbed = nullptr;
+        CurEmbedIndex = 0;
+        return Embed;
+      }
+    }
+
+    Result = new (SemaRef.Context)
+        EmbedExpr(SemaRef.Context, Embed->getLocation(), Embed->getData(),
+                  CurEmbedIndex, ElsCount);
+    CurEmbedIndex += ElsCount;
+    if (CurEmbedIndex >= Embed->getDataElementCount()) {
+      CurEmbed = nullptr;
+      CurEmbedIndex = 0;
+    }
+    return Result;
+  }
+
 public:
   InitListChecker(
       Sema &S, const InitializedEntity &Entity, InitListExpr *IL, QualType &T,
@@ -1473,6 +1511,9 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
       // Brace elision is never performed if the element is not an
       // assignment-expression.
       if (Seq || isa<InitListExpr>(expr)) {
+        if (auto *Embed = dyn_cast<EmbedExpr>(expr)) {
+          expr = HandleEmbed(Embed, Entity);
+        }
         if (!VerifyOnly) {
           ExprResult Result = Seq.Perform(SemaRef, TmpEntity, Kind, expr);
           if (Result.isInvalid())
@@ -1486,7 +1527,8 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
           UpdateStructuredListElement(StructuredList, StructuredIndex,
                                       getDummyInit());
         }
-        ++Index;
+        if (!CurEmbed)
+          ++Index;
         if (AggrDeductionCandidateParamTypes)
           AggrDeductionCandidateParamTypes->push_back(ElemType);
         return;
@@ -1679,6 +1721,8 @@ void InitListChecker::CheckScalarType(const InitializedEntity &Entity,
     ++Index;
     ++StructuredIndex;
     return;
+  } else if (auto *Embed = dyn_cast<EmbedExpr>(expr)) {
+    expr = HandleEmbed(Embed, Entity);
   }
 
   ExprResult Result;
@@ -1700,14 +1744,16 @@ void InitListChecker::CheckScalarType(const InitializedEntity &Entity,
   else {
     ResultExpr = Result.getAs<Expr>();
 
-    if (ResultExpr != expr && !VerifyOnly) {
+    if (ResultExpr != expr && !VerifyOnly && !CurEmbed) {
       // The type was promoted, update initializer list.
       // FIXME: Why are we updating the syntactic init list?
       IList->setInit(Index, ResultExpr);
     }
   }
+
   UpdateStructuredListElement(StructuredList, StructuredIndex, ResultExpr);
-  ++Index;
+  if (!CurEmbed)
+    ++Index;
   if (AggrDeductionCandidateParamTypes)
     AggrDeductionCandidateParamTypes->push_back(DeclType);
 }
@@ -1946,6 +1992,30 @@ static bool checkDestructorReference(QualType ElementType, SourceLocation Loc,
   return SemaRef.DiagnoseUseOfDecl(Destructor, Loc);
 }
 
+static bool canInitializeArrayWithEmbedDataString(ArrayRef<Expr *> ExprList,
+                                                  QualType InitType,
+                                                  ASTContext &Context) {
+  // Only one initializer, it's an embed and the types match;
+  EmbedExpr *EE =
+      ExprList.size() == 1
+          ? dyn_cast_if_present<EmbedExpr>(ExprList[0]->IgnoreParens())
+          : nullptr;
+  if (!EE)
+    return false;
+
+  if (InitType->isArrayType()) {
+    const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe();
+    QualType InitElementTy = InitArrayType->getElementType();
+    QualType EmbedExprElementTy = EE->getType();
+    const bool TypesMatch =
+        Context.typesAreCompatible(InitElementTy, EmbedExprElementTy) ||
+        (InitElementTy->isCharType() && EmbedExprElementTy->isCharType());
+    if (TypesMatch)
+      return true;
+  }
+  return false;
+}
+
 void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
                                      InitListExpr *IList, QualType &DeclType,
                                      llvm::APSInt elementIndex,
@@ -1963,6 +2033,12 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
     }
   }
 
+  if (canInitializeArrayWithEmbedDataString(IList->inits(), DeclType,
+                                            SemaRef.Context)) {
+    EmbedExpr *Embed = cast<EmbedExpr>(IList->inits()[0]);
+    IList->setInit(0, Embed->getDataStringLiteral());
+  }
+
   // Check for the special-case of initializing an array with a string.
   if (Index < IList->getNumInits()) {
     if (IsStringInit(IList->getInit(Index), arrayType, SemaRef.Context) ==
@@ -2065,13 +2141,24 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
     if (maxElementsKnown && elementIndex == maxElements)
       break;
 
-    InitializedEntity ElementEntity =
-      InitializedEntity::InitializeElement(SemaRef.Context, StructuredIndex,
-                                           Entity);
+    InitializedEntity ElementEntity = InitializedEntity::InitializeElement(
+        SemaRef.Context, StructuredIndex, Entity);
+
+    unsigned EmbedElementIndexBeforeInit = CurEmbedIndex;
     // Check this element.
     CheckSubElementType(ElementEntity, IList, elementType, Index,
                         StructuredList, StructuredIndex);
     ++elementIndex;
+    if ((CurEmbed || isa<EmbedExpr>(Init)) && elementType->isScalarType()) {
+      if (CurEmbed) {
+        elementIndex =
+            elementIndex + CurEmbedIndex - EmbedElementIndexBeforeInit - 1;
+      } else {
+        auto Embed = cast<EmbedExpr>(Init);
+        elementIndex = elementIndex + Embed->getDataElementCount() -
+                       EmbedElementIndexBeforeInit - 1;
+      }
+    }
 
     // If the array is of incomplete type, keep track of the number of
     // elements in the initializer.
@@ -9077,19 +9164,18 @@ ExprResult InitializationSequence::Perform(Sema &S,
           }
         }
       }
-
+      Expr *Init = CurInit.get();
       CheckedConversionKind CCK =
           Kind.isCStyleCast()       ? CheckedConversionKind::CStyleCast
           : Kind.isFunctionalCast() ? CheckedConversionKind::FunctionalCast
           : Kind.isExplicitCast()   ? CheckedConversionKind::OtherCast
                                     : CheckedConversionKind::Implicit;
-      ExprResult CurInitExprRes =
-        S.PerformImplicitConversion(CurInit.get(), Step->Type, *Step->ICS,
-                                    getAssignmentAction(Entity), CCK);
+      ExprResult CurInitExprRes = S.PerformImplicitConversion(
+          Init, Step->Type, *Step->ICS, getAssignmentAction(Entity), CCK);
       if (CurInitExprRes.isInvalid())
         return ExprError();
 
-      S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), CurInit.get());
+      S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), Init);
 
       CurInit = CurInitExprRes;
 
@@ -9244,10 +9330,11 @@ ExprResult InitializationSequence::Perform(Sema &S,
 
     case SK_CAssignment: {
       QualType SourceType = CurInit.get()->getType();
+      Expr *Init = CurInit.get();
 
       // Save off the initial CurInit in case we need to emit a diagnostic
-      ExprResult InitialCurInit = CurInit;
-      ExprResult Result = CurInit;
+      ExprResult InitialCurInit = Init;
+      ExprResult Result = Init;
       Sema::AssignConvertType ConvTy =
         S.CheckSingleAssignmentConstraints(Step->Type, Result, true,
             Entity.getKind() == InitializedEntity::EK_Parameter_CF_Audited);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 3bfda09d5f80f..f117fe98d142b 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12939,6 +12939,11 @@ ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
                                            getSema().CurContext);
 }
 
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformEmbedExpr(EmbedExpr *E) {
+  return E;
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 67ef170251914..a0ffe24e1f91e 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1323,6 +1323,17 @@ void ASTStmtReader::VisitSourceLocExpr(SourceLocExpr *E) {
   E->SourceLocExprBits.Kind = Record.readInt();
 }
 
+void ASTStmtReader::VisitEmbedExpr(EmbedExpr *E) {
+  VisitExpr(E);
+  E->EmbedKeywordLoc = readSourceLocation();
+  EmbedDataStorage *Data = new (Record.getContext()) EmbedDataStorage;
+  Data->Filename = cast<StringLiteral>(Record.readSubStmt());
+  Data->BinaryData = cast<StringLiteral>(Record.readSubStmt());
+  E->Data = Data;
+  E->Begin = Record.readInt();
+  E->NumOfElements = Record.readInt();
+}
+
 void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
   E->setAmpAmpLoc(readSourceLocation());
@@ -3233,6 +3244,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) SourceLocExpr(Empty);
       break;
 
+    case EXPR_BUILTIN_PP_EMBED:
+      S = new (Context) EmbedExpr(Empty);
+      break;
+
     case EXPR_ADDR_LABEL:
       S = new (Context) AddrLabelExpr(Empty);
       break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 1ba6d5501fd10..546af09d41a35 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1262,6 +1262,17 @@ void ASTStmtWriter::VisitSourceLocExpr(SourceLocExpr *E) {
   Code = serialization::EXPR_SOURCE_LOC;
 }
 
+void ASTStmtWriter::VisitEmbedExpr(EmbedExpr *E) {
+  VisitExpr(E);
+  Record.AddSourceLocation(E->getBeginLoc());
+  Record.AddSourceLocation(E->getEndLoc());
+  Record.AddStmt(E->getFilenameStringLiteral());
+  Record.AddStmt(E->getDataStringLiteral());
+  Record.writeUInt32(E->getStartingElementPos());
+  Record.writeUInt32(E->getDataElementCount());
+  Code = serialization::EXPR_BUILTIN_PP_EMBED;
+}
+
 void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
   Record.AddSourceLocation(E->getAmpAmpLoc());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 197d673107285..b331be8f56640 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2422,6 +2422,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
+
+    case Stmt::EmbedExprClass:
+      llvm_unreachable("Support for EmbedExpr is not implemented.");
+      break;
   }
 }
 
diff --git a/clang/test/C/C2x/Inputs/bits.bin b/clang/test/C/C2x/Inputs/bits.bin
new file mode 100644
index 0000000000000..ad471007bd7f5
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/bits.bin
@@ -0,0 +1 @@
+0123456789
\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/boop.h b/clang/test/C/C2x/Inputs/boop.h
new file mode 100644
index 0000000000000..d3e39674f1962
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/boop.h
@@ -0,0 +1 @@
+*boop*
\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/i.dat b/clang/test/C/C2x/Inputs/i.dat
new file mode 100644
index 0000000000000..c227083464fb9
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/i.dat
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/jump.wav b/clang/test/C/C2x/Inputs/jump.wav
new file mode 100644
index 0000000000000..a71100636e867
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/jump.wav
@@ -0,0 +1 @@
+RIFF
\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/s.dat b/clang/test/C/C2x/Inputs/s.dat
new file mode 100644
index 0000000000000..3a332e6bba38d
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/s.dat
@@ -0,0 +1 @@
+012345678
\ No newline at end of file
diff --git a/clang/test/C/C2x/n3017.c b/clang/test/C/C2x/n3017.c
new file mode 100644
index 0000000000000..0d22d31baa4b7
--- /dev/null
+++ b/clang/test/C/C2x/n3017.c
@@ -0,0 +1,216 @@
+// RUN: %clang_cc1 -verify -fsyntax-only --embed-dir=%S/Inputs -std=c2x %s -Wno-constant-logical-operand
+
+/* WG14 N3017: full
+ * #embed - a scannable, tooling-friendly binary resource inclusion mechanism
+ */
+
+// C23 6.10p6
+char b1[] = {
+#embed "boop.h" limit(5)
+,
+#embed "boop.h" __limit__(5)
+};
+
+// C23 6.10.1p19
+#if __has_embed(__FILE__ ext::token(0xB055))
+#error "Supports an extension parameter Clang never claimed to support?"
+#endif
+
+#if !__has_embed(__FILE__ clang::offset(0))
+#error "Doesn't support an extension Clang claims to support?"
+#endif
+
+// C23 6.10.1p20
+void parse_into_s(short* ptr, unsigned char* ptr_bytes, unsigned long long size);
+int f() {
+#if __has_embed ("bits.bin" ds9000::element_type(short))
+  /* Implementation extension: create short integers from the */
+  /* translation environment resource into */
+  /* a sequence of integer constants */
+  short meow[] = {
+#embed "bits.bin" ds9000::element_type(short)
+  };
+#elif __has_embed ("bits.bin")
+  /* no support for implementation-specific */
+  /* ds9000::element_type(short) parameter */
+  unsigned char meow_bytes[] = {
+  #embed "bits.bin"
+  };
+  short meow[sizeof(meow_bytes) / sizeof(short)] = {};
+  /* parse meow_bytes into short values by-hand! */
+  parse_into_s(meow, meow_bytes, sizeof(meow_bytes));
+#else
+#error "cannot find bits.bin resource"
+#endif
+  return (int)(meow[0] + meow[(sizeof(meow) / sizeof(*meow)) - 1]);
+}
+
+// NOTE: we don't have a good way to test infinite resources from within lit.
+int g() {
+#if __has_embed(<infinite-resource> limit(0)) == 2
+  // if <infinite-resource> exists, this
+  // token sequence is always taken.
+  return 0;
+#else
+  // the �infinite-resource� resource does not exist
+  #error "The resource does not exist"
+#endif
+  // expected-error at -2 {{"The resource does not exist"}}
+}
+
+#include <stddef.h>
+void have_you_any_wool(const unsigned char*, size_t);
+int h() {
+  static const unsigned char baa_baa[] = {
+#embed __FILE__
+  };
+  have_you_any_wool(baa_baa, sizeof(baa_baa));
+  return 0;
+}
+
+// C23 6.10.3.1p17: not tested here because we do not currently support any
+// platforms where CHAR_BIT != 8.
+
+// C23 6.10.3.1p18
+int i() {
+/* Braces may be kept or elided as per normal initialization rules */
+  int i = {
+#embed "i.dat"
+  }; /* valid if i.dat produces 1 value,
+        i value is [0, 2(embed element width)) */
+  int i2 =
+#embed "i.dat"
+  ; /* valid if i.dat produces 1 value,
+       i2 value is [0, 2(embed element width)) */
+  struct s {
+    double a, b, c;
+    struct { double e, f, g; };
+    double h, i, j;
+  };
+  struct s x = {
+    /* initializes each element in order according to initialization
+    rules with comma-separated list of integer constant expressions
+    inside of braces */
+    #embed "s.dat"
+  };
+  return 0;
+}
+
+// C23 6.10.3.1p19: not tested here because it's a runtime test rather than one
+// which can be handled at compile time (it validates file contents via fread).
+
+// C23 6.10.3.2p5
+int j() {
+  static const char sound_signature[] = {
+#embed <jump.wav> limit(2+2)
+  };
+  static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4,
+    "There should only be 4 elements in this array.");
+  // verify PCM WAV resource
+  static_assert(sound_signature[0] == 'R');
+  static_assert(sound_signature[1] == 'I');
+  static_assert(sound_signature[2] == 'F');
+  static_assert(sound_signature[3] == 'F');
+  static_assert(sizeof(sound_signature) == 4);
+  return 0;
+}
+
+// C23 6.10.3p6
+int k() {
+#define TWO_PLUS_TWO 2+2
+  static const char sound_signature[] = {
+#embed <jump.wav> limit(TWO_PLUS_TWO)
+  };
+  static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4,
+    "There should only be 4 elements in this array.");
+  // verify PCM WAV resource
+  static_assert(sound_signature[0] == 'R');
+  static_assert(sound_signature[1] == 'I');
+  static_assert(sound_signature[2] == 'F');
+  static_assert(sound_signature[3] == 'F');
+  static_assert(sizeof(sound_signature) == 4);
+  return 0;
+}
+
+// C23 6.10.3.2p7: not tested here because we do not currently support any
+// platforms where CHAR_BIT != 8.
+
+// C23 6.10.3.2p8: not tested here because it requires access to an infinite
+// resource like /dev/urandom.
+
+// C23 6.10.3.3p4
+char *strcpy(char *, const char *);
+#ifndef SHADER_TARGET
+  #define SHADER_TARGET "bits.bin"
+#endif
+extern char* null_term_shader_data;
+void fill_in_data () {
+  const char internal_data[] = {
+#embed SHADER_TARGET \
+  suffix(,)
+  0
+  };
+  strcpy(null_term_shader_data, internal_data);
+}
+
+// C23 6.10.3.4p4
+#ifndef SHADER_TARGET
+#define SHADER_TARGET "bits.bin"
+#endif
+extern char* merp;
+void init_data () {
+  const char whl[] = {
+#embed SHADER_TARGET \
+    prefix(0xEF, 0xBB, 0xBF, ) /* UTF-8 BOM */ \
+    suffix(,)
+    0
+  };
+  // always null terminated,
+  // contains BOM if not-empty
+  const int is_good = (sizeof(whl) == 1 && whl[0] == '\0')
+    || (whl[0] == '\xEF' && whl[1] == '\xBB'
+    && whl[2] == '\xBF' && whl[sizeof(whl) - 1] == '\0');
+  static_assert(is_good);
+  strcpy(merp, whl);
+}
+
+// C23 6.10.3.5p3
+int l() {
+  return
+#embed <bits.bin> limit(0) prefix(1) if_empty(0)
+  ;
+  // becomes:
+  // return 0;
+
+  // Validating the assumption from the example in the standard.
+  static_assert(
+#embed <bits.bin> limit(0) prefix(1) if_empty(0)
+    == 0);
+}
+
+// C23 6.10.3.5p4
+void fill_in_data_again() {
+  const char internal_data[] = {
+#embed SHADER_TARGET \
+  suffix(, 0) \
+  if_empty(0)
+  };
+  strcpy(null_term_shader_data, internal_data);
+}
+
+// C23 6.10.3.5p5
+int m() {
+  return
+#embed __FILE__ limit(0) if_empty(45540)
+  ;
+
+  // Validating the assumption from the example in the standard.
+  static_assert(
+#embed __FILE__ limit(0) if_empty(45540)
+    == 45540);
+}
+
+// 6.10.9.1p1
+static_assert(__STDC_EMBED_NOT_FOUND__ == 0);
+static_assert(__STDC_EMBED_FOUND__ == 1);
+static_assert(__STDC_EMBED_EMPTY__ == 2);
diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt
new file mode 100644
index 0000000000000..93d177a48c83a
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/jk.txt
@@ -0,0 +1 @@
+jk
\ No newline at end of file
diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt
new file mode 100644
index 0000000000000..1ce9ab967e4a1
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/art.txt
@@ -0,0 +1,9 @@
+           __  _
+       .-.'  `; `-._  __  _
+      (_,         .-:'  `; `-._
+    ,'o"(        (_,           )
+   (__,-'      ,'o"(            )>
+      (       (__,-'            )
+       `-'._.--._(             )
+          |||  |||`-'._.--._.-'
+                     |||  |||
diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Preprocessor/Inputs/null_byte.bin b/clang/test/Preprocessor/Inputs/null_byte.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d
GIT binary patch
literal 1
IcmZPo000310RR91

literal 0
HcmV?d00001

diff --git a/clang/test/Preprocessor/Inputs/numbers.txt b/clang/test/Preprocessor/Inputs/numbers.txt
new file mode 100644
index 0000000000000..11f11f9be3bab
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/numbers.txt
@@ -0,0 +1 @@
+0123456789
diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt
new file mode 100644
index 0000000000000..63d8dbd40c235
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/single_byte.txt
@@ -0,0 +1 @@
+b
\ No newline at end of file
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
new file mode 100644
index 0000000000000..43a3068b5f53a
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 1
+#elif __has_embed("media/art.txt") != __STDC_EMBED_FOUND__
+#error 2
+#elif __has_embed("asdkasdjkadsjkdsfjk") != __STDC_EMBED_NOT_FOUND__
+#error 3
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) != __STDC_EMBED_NOT_FOUND__
+#error 4
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) != __STDC_EMBED_NOT_FOUND__
+#error 5
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) != __STDC_EMBED_NOT_FOUND__
+#error 6
+#elif __has_embed(__FILE__ limit(2) prefix(y)) != __STDC_EMBED_FOUND__
+#error 7
+#elif __has_embed(__FILE__ limit(2)) != __STDC_EMBED_FOUND__
+#error 8
+// 6.10.1p7, if the search fails or any of the embed parameters in the embed
+// parameter sequence specified are not supported by the implementation for the
+// #embed directive;
+// We don't support one of the embed parameters.
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) != __STDC_EMBED_NOT_FOUND__
+#error 9
+#elif __has_embed(<media/empty>) != __STDC_EMBED_EMPTY__
+#error 10
+// 6.10.1p7: if the search for the resource succeeds and all embed parameters
+// in the embed parameter sequence specified are supported by the
+// implementation for the #embed directive and the resource is empty
+// Limiting to zero characters means the resource is empty.
+#elif __has_embed(<media/empty> limit(0)) != __STDC_EMBED_EMPTY__
+#error 11
+#elif __has_embed(<media/art.txt> limit(0)) != __STDC_EMBED_EMPTY__
+#error 12
+// Test that an offset past the end of the file produces an empty file.
+#elif __has_embed(<single_byte.txt> clang::offset(1)) != __STDC_EMBED_EMPTY__
+#error 13
+// Test that we apply the offset before we apply the limit. If we did this in
+// the reverse order, this would cause the file to be empty because we would
+// have limited it to 1 byte and then offset past it.
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(12)) != __STDC_EMBED_FOUND__
+#error 14
+#elif __has_embed(<media/art.txt>) != __STDC_EMBED_FOUND__
+#error 15
+#elif __has_embed(<media/art.txt> if_empty(meow)) != __STDC_EMBED_FOUND__
+#error 16
+#endif
+
+// Ensure that when __has_embed returns true, the file can actually be
+// embedded. This was previously failing because the way in which __has_embed
+// would search for files was differentl from how #embed would resolve them
+// when the file path included relative path markers like `./` or `../`.
+#if __has_embed("./embed___has_embed.c") == __STDC_EMBED_FOUND__
+unsigned char buffer[] = {
+#embed "./embed___has_embed.c"
+};
+#else
+#error 17
+#endif
diff --git a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
new file mode 100644
index 0000000000000..fcaf693fe0ff2
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
@@ -0,0 +1,240 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+// Test the parsing behavior for __has_embed and all of its parameters to ensure we
+// recover from failures gracefully.
+
+// expected-error at +2 {{missing '(' after '__has_embed'}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed
+#endif
+
+// expected-error at +3 {{expected '>'}} \
+   expected-note at +3 {{to match this '<'}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed(<)
+#endif
+
+// expected-error at +3 {{expected "FILENAME" or <FILENAME>}} \
+   expected-warning at +3 {{missing terminating '"' character}} \
+   expected-error at +3 {{invalid token at start of a preprocessor expression}}
+#if __has_embed(")
+#endif
+
+// expected-error at +2 {{missing '(' after '__has_embed'}} \
+   expected-error at +2 {{token is not a valid binary operator in a preprocessor subexpression}}
+#if __has_embed file.txt
+#endif
+
+// OK, no diagnostic for an unknown embed parameter.
+#if __has_embed("media/empty" xxx)
+#endif
+
+// expected-error at +2 {{expected identifier}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" xxx::)
+#endif
+
+// OK, no diagnostic for an unknown embed parameter.
+#if __has_embed("media/empty" xxx::xxx)
+#endif
+
+// expected-error at +2 {{expected identifier}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" xxx::42)
+#endif
+
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" limit)
+#endif
+
+// We get the same diagnostic twice intentionally. The first one is because of
+// the missing value within limit() and the second one is because the #if does
+// not resolve to a value due to the earlier error.
+// expected-error at +1 2 {{expected value in expression}}
+#if __has_embed("media/empty" limit()
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" limit(xxx)
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" limit(42)
+#endif
+
+// expected-error at +2 {{invalid token at start of a preprocessor expression}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" limit([)
+#endif
+
+// expected-error at +2 {{invalid token at start of a preprocessor expression}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" limit([))
+#endif
+
+// expected-error at +2 {{division by zero in preprocessor expression}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" limit(1/0))
+#endif
+
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset)
+#endif
+
+// We get the same diagnostic twice intentionally. The first one is because of
+// the missing value within clang::offset() and the second one is because the
+// #if does not resolve to a value due to the earlier error.
+// expected-error at +1 2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset()
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" clang::offset(xxx)
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" clang::offset(42)
+#endif
+
+// expected-error at +2 {{invalid token at start of a preprocessor expression}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset([)
+#endif
+
+// expected-error at +2 {{invalid token at start of a preprocessor expression}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset([))
+#endif
+
+// expected-error at +2 {{division by zero in preprocessor expression}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset(1/0))
+#endif
+
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset 42)
+#endif
+
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" prefix)
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" prefix()
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" prefix(xxx)
+#endif
+
+#if __has_embed("media/empty" prefix(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/empty" prefix(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error at +3 {{expected '}'}} \
+   expected-note at +3 {{to match this '{'}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{)]}))
+#endif
+// expected-error at +3 {{expected ']'}} \
+   expected-note at +3 {{to match this '['}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{})}))
+#endif
+// expected-error at +3 {{expected ')'}} \
+   expected-note at +3 {{to match this '('}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{}]}))
+#endif
+#if __has_embed("media/empty" prefix()) // OK: tokens within parens are optional
+#endif
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" prefix))
+#endif
+
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" suffix)
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" suffix()
+#endif
+
+// expected-error at +3 {{missing ')' after '__has_embed'}} \
+   expected-error at +3 {{expected value in expression}} \
+   expected-note at +3 {{to match this '('}}
+#if __has_embed("media/empty" suffix(xxx)
+#endif
+
+#if __has_embed("media/empty" suffix(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/empty" suffix(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error at +3 {{expected '}'}} \
+   expected-note at +3 {{to match this '{'}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{)]}))
+#endif
+// expected-error at +3 {{expected ']'}} \
+   expected-note at +3 {{to match this '['}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{})}))
+#endif
+// expected-error at +3 {{expected ')'}} \
+   expected-note at +3 {{to match this '('}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{}]}))
+#endif
+#if __has_embed("media/empty" suffix()) // OK: tokens within parens are optional
+#endif
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/empty" suffix))
+#endif
+
+#if __has_embed("media/art.txt" if_empty(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/art.txt" if_empty(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error at +3 {{expected '}'}} \
+   expected-note at +3 {{to match this '{'}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{)]}))
+#endif
+// expected-error at +3 {{expected ']'}} \
+   expected-note at +3 {{to match this '['}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{})}))
+#endif
+// expected-error at +3 {{expected ')'}} \
+   expected-note at +3 {{to match this '('}} \
+   expected-error at +3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{}]}))
+#endif
+#if __has_embed("media/art.txt" if_empty()) // OK: tokens within parens are optional
+#endif
+// expected-error at +2 {{expected '('}} \
+   expected-error at +2 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty))
+#endif
+
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
new file mode 100644
index 0000000000000..e51dbb870372b
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 1
+#elif __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 2
+#elif __has_embed(__FILE__ suffix(x)) != __STDC_EMBED_FOUND__
+#error 3
+#elif __has_embed(__FILE__ suffix(x) limit(1)) != __STDC_EMBED_FOUND__
+#error 4
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1)) != __STDC_EMBED_FOUND__
+#error 5
+#elif __has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
+#error 6
+#elif __has_embed(__FILE__ suffix(x) limit(0) prefix(1)) != __STDC_EMBED_EMPTY__
+#error 7
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
+#error 8
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != __STDC_EMBED_EMPTY__
+#error 9
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != __STDC_EMBED_EMPTY__
+#error 10
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
new file mode 100644
index 0000000000000..a664715091319
--- /dev/null
+++ b/clang/test/Preprocessor/embed_art.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed <media/art.txt>
+};
+const char data2[] = {
+#embed <media/art.txt>
+, 0
+};
+const char data3[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const char data4[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+static_assert(sizeof(data) == 274);
+static_assert(' ' == data[0]);
+static_assert('_' == data[11]);
+static_assert('\n' == data[273]);
+static_assert(sizeof(data2) == 275);
+static_assert(' ' == data2[0]);
+static_assert('_' == data2[11]);
+static_assert('\n' == data2[273]);
+static_assert('\0' == data2[274]);
+static_assert(sizeof(data3) == 275);
+static_assert(' ' == data3[0]);
+static_assert('_' == data3[11]);
+static_assert('\n' == data3[273]);
+static_assert('\0' == data3[274]);
+static_assert(sizeof(data4) == 275);
+static_assert(' ' == data4[0]);
+static_assert('_' == data4[11]);
+static_assert('\n' == data4[273]);
+static_assert('\0' == data4[274]);
+
+const signed char data5[] = {
+#embed <media/art.txt>
+};
+const signed char data6[] = {
+#embed <media/art.txt>
+, 0
+};
+const signed char data7[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const signed char data8[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+static_assert(sizeof(data5) == 274);
+static_assert(' ' == data5[0]);
+static_assert('_' == data5[11]);
+static_assert('\n' == data5[273]);
+static_assert(sizeof(data6) == 275);
+static_assert(' ' == data6[0]);
+static_assert('_' == data6[11]);
+static_assert('\n' == data6[273]);
+static_assert('\0' == data6[274]);
+static_assert(sizeof(data7) == 275);
+static_assert(' ' == data7[0]);
+static_assert('_' == data7[11]);
+static_assert('\n' == data7[273]);
+static_assert('\0' == data7[274]);
+static_assert(sizeof(data8) == 275);
+static_assert(' ' == data8[0]);
+static_assert('_' == data8[11]);
+static_assert('\n' == data8[273]);
+static_assert('\0' == data8[274]);
+
+const unsigned char data9[] = {
+#embed <media/art.txt>
+};
+const unsigned char data10[] = {
+0,
+#embed <media/art.txt>
+};
+const unsigned char data11[] = {
+#embed <media/art.txt> prefix(0,)
+};
+const unsigned char data12[] = {
+0
+#embed <media/art.txt> prefix(,)
+};
+static_assert(sizeof(data9) == 274);
+static_assert(' ' == data9[0]);
+static_assert('_' == data9[11]);
+static_assert('\n' == data9[273]);
+static_assert(sizeof(data10) == 275);
+static_assert(' ' == data10[1]);
+static_assert('_' == data10[12]);
+static_assert('\n' == data10[274]);
+static_assert('\0' == data10[0]);
+static_assert(sizeof(data11) == 275);
+static_assert(' ' == data11[1]);
+static_assert('_' == data11[12]);
+static_assert('\n' == data11[274]);
+static_assert('\0' == data11[0]);
+static_assert(sizeof(data12) == 275);
+static_assert(' ' == data12[1]);
+static_assert('_' == data12[12]);
+static_assert('\n' == data12[274]);
+static_assert('\0' == data12[0]);
diff --git a/clang/test/Preprocessor/embed_codegen.cpp b/clang/test/Preprocessor/embed_codegen.cpp
new file mode 100644
index 0000000000000..64110afc162d7
--- /dev/null
+++ b/clang/test/Preprocessor/embed_codegen.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 %s -triple x86_64 --embed-dir=%S/Inputs -emit-llvm -o - | FileCheck %s
+
+// CHECK: @__const._Z3fooi.ca = private unnamed_addr constant [3 x i32] [i32 0, i32 106, i32 107], align 4
+// CHECK: @__const._Z3fooi.sc = private unnamed_addr constant %struct.S1 { i32 106, i32 107, i32 0 }, align 4
+// CHECK: @__const._Z3fooi.t = private unnamed_addr constant [3 x %struct.T] [%struct.T { [2 x i32] [i32 48, i32 49], %struct.S1 { i32 50, i32 51, i32 52 } }, %struct.T { [2 x i32] [i32 53, i32 54], %struct.S1 { i32 55, i32 56, i32 57 } }, %struct.T { [2 x i32] [i32 10, i32 0], %struct.S1 zeroinitializer }], align 16
+void foo(int a) {
+// CHECK: %a.addr = alloca i32, align 4
+// CHECK: store i32 %a, ptr %a.addr, align 4
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %ca, ptr align 4 @__const._Z3fooi.ca, i64 12, i1 false)
+int ca[] = {
+0
+#embed <jk.txt> prefix(,)
+};
+
+// CHECK: %arrayinit.element = getelementptr inbounds i32, ptr %notca, i64 1
+// CHECK: store i8 106, ptr %arrayinit.element, align 4
+// CHECK: %arrayinit.element1 = getelementptr inbounds i32, ptr %notca, i64 2
+// CHECK: store i8 107, ptr %arrayinit.element1, align 4
+int notca[] = {
+a
+#embed <jk.txt> prefix(,)
+};
+
+struct S1 {
+  int x, y, z;
+};
+
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %sc, ptr align 4 @__const._Z3fooi.sc, i64 12, i1 false)
+S1 sc = {
+#embed <jk.txt> suffix(,)
+0
+};
+
+// CHECK: %x = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 0
+// CHECK: store i32 106, ptr %x, align 4
+// CHECK: %y = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 1
+// CHECK: store i32 107, ptr %y, align 4
+// CHECK: %z = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 2
+// CHECK: %1 = load i32, ptr %a.addr, align 4
+S1 s = {
+#embed <jk.txt> suffix(,)
+a
+};
+
+// CHECK: store i32 107, ptr %b, align 4
+int b =
+#embed<jk.txt>
+;
+
+
+struct T {
+  int arr[2];
+  struct S1 s;
+};
+
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %t, ptr align 16 @__const._Z3fooi.t, i64 60, i1 false)
+constexpr struct T t[] = {
+#embed <numbers.txt>
+};
+
+// CHECK:  %arr = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 0
+// CHECK:  %2 = load i32, ptr %a.addr, align 4
+// CHECK:  store i32 %2, ptr %arr, align 4
+// CHECK:  %arrayinit.element2 = getelementptr inbounds i32, ptr %arr, i64 1
+// CHECK:  store i32 300, ptr %arrayinit.element2, align 4
+// CHECK:  %s3 = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 1
+// CHECK:  %x4 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 0
+// CHECK:  store i32 1, ptr %x4, align 4
+// CHECK:  %y5 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 1
+// CHECK:  store i32 2, ptr %y5, align 4
+// CHECK:  %z6 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 2
+// CHECK:  store i32 3, ptr %z6, align 4
+// CHECK:  %arrayinit.element7 = getelementptr inbounds %struct.T, ptr %tnonc, i64 1
+// CHECK:  call void @llvm.memset.p0.i64(ptr align 4 %arrayinit.element7, i8 0, i64 20, i1 false)
+// CHECK:  %arr8 = getelementptr inbounds %struct.T, ptr %arrayinit.element7, i32 0, i32 0
+// CHECK:  store i8 106, ptr %arr8, align 4
+// CHECK:  %arrayinit.element9 = getelementptr inbounds i32, ptr %arr8, i64 1
+// CHECK:  store i8 107, ptr %arrayinit.element9, align 4
+struct T tnonc[] = {
+  a, 300, 1, 2, 3
+#embed <jk.txt> prefix(,)
+};
+
+}
diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp
new file mode 100644
index 0000000000000..1cadff76b4890
--- /dev/null
+++ b/clang/test/Preprocessor/embed_constexpr.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions
+
+constexpr int value(int a, int b) {
+  return a + b;
+}
+
+constexpr int func_call() {
+  return value(
+#embed <jk.txt>
+  );
+}
+
+constexpr int init_list_expr() {
+  int vals[] = {
+#embed <jk.txt>
+  };
+  return value(vals[0], vals[1]);
+}
+
+template <int N, int M>
+struct Hurr {
+  static constexpr int V1 = N;
+  static constexpr int V2 = M;
+};
+
+constexpr int template_args() {
+  Hurr<
+#embed <jk.txt>
+  > H;
+  return value(H.V1, H.V2);
+}
+
+constexpr int ExpectedValue = 'j' + 'k';
+static_assert(func_call() == ExpectedValue);
+static_assert(init_list_expr() == ExpectedValue);
+static_assert(template_args() == ExpectedValue);
+
+static_assert(
+#embed <jk.txt> limit(1) suffix(== 'j')
+);
+
+int array[
+#embed <jk.txt> limit(1)
+];
+static_assert(sizeof(array) / sizeof(int) == 'j');
+
+constexpr int comma_expr = (
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+);
+static_assert(comma_expr == 'k');
+
+constexpr int comma_expr_init_list{ (
+#embed <jk.txt> limit(1)
+) };
+static_assert(comma_expr_init_list == 'j');
+
+constexpr int paren_init(
+#embed <jk.txt> limit(1)
+);
+static_assert(paren_init == 'j');
+
+struct S {
+  const char buffer[2] = {
+#embed "jk.txt"
+  };
+};
+
+constexpr struct S s;
+static_assert(s.buffer[1] == 'k');
+
+struct S1 {
+  int x, y;
+};
+
+struct T {
+  int x, y;
+  struct S1 s;
+};
+
+constexpr struct T t[] = {
+#embed <numbers.txt>
+};
+static_assert(t[0].s.x == '2');
+
+constexpr int func(int i, int) { return i; }
+static_assert(
+  func(
+#embed <jk.txt>
+  ) == 'j');
+
+template <int N>
+struct ST {};
+
+ST<
+#embed <jk.txt> limit(1)
+> st;
diff --git a/clang/test/Preprocessor/embed_dependencies.c b/clang/test/Preprocessor/embed_dependencies.c
new file mode 100644
index 0000000000000..4e00dc79ac190
--- /dev/null
+++ b/clang/test/Preprocessor/embed_dependencies.c
@@ -0,0 +1,20 @@
+// RUN: %clang %s -fsyntax-only -std=c23 -M --embed-dir=%S/Inputs -Xclang -verify | FileCheck %s
+
+// Yes this looks very strange indeed, but the goal is to test that we add
+// files referenced by both __has_embed and #embed when we generate
+// dependencies, so we're trying to see that both of these files are in the
+// output.
+#if __has_embed(<jk.txt>)
+const char data =
+#embed "Inputs/single_byte.txt"
+;
+_Static_assert('b' == data);
+#else
+#error "oops"
+#endif
+// expected-no-diagnostics
+
+// CHECK: embed_dependencies.c \
+// CHECK-NEXT: jk.txt \
+// CHECK-NEXT: Inputs{{[/\\]}}single_byte.txt
+
diff --git a/clang/test/Preprocessor/embed_ext_compat_diags.c b/clang/test/Preprocessor/embed_ext_compat_diags.c
new file mode 100644
index 0000000000000..74f24176d9cca
--- /dev/null
+++ b/clang/test/Preprocessor/embed_ext_compat_diags.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=none -pedantic
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat
+// RUN: %clang_cc1 -std=c17 %s -fsyntax-only --embed-dir=%S/Inputs -verify=ext -pedantic
+// RUN: %clang_cc1 -x c++ %s -fsyntax-only --embed-dir=%S/Inputs -verify=cxx -pedantic
+// none-no-diagnostics
+
+#if __has_embed("jk.txt")
+
+const char buffer[] = {
+#embed "jk.txt" /* compat-warning {{#embed is incompatible with C standards before C23}}
+                   ext-warning {{#embed is a C23 extension}}
+                   cxx-warning {{#embed is a Clang extension}}
+                 */
+};
+#endif
+
diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp
new file mode 100644
index 0000000000000..2648804132599
--- /dev/null
+++ b/clang/test/Preprocessor/embed_feature_test.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -x c %s -E -CC -verify
+// expected-no-diagnostics
+
+#if !defined(__has_embed)
+#error 1
+#endif
diff --git a/clang/test/Preprocessor/embed_file_not_found_chevron.c b/clang/test/Preprocessor/embed_file_not_found_chevron.c
new file mode 100644
index 0000000000000..472222aafa55a
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found_chevron.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#embed <nfejfNejAKFe>
+// expected-error at -1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_file_not_found_quote.c b/clang/test/Preprocessor/embed_file_not_found_quote.c
new file mode 100644
index 0000000000000..bf9c62b55c99e
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found_quote.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#embed "nfejfNejAKFe"
+// expected-error at -1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
new file mode 100644
index 0000000000000..79b1743703ac5
--- /dev/null
+++ b/clang/test/Preprocessor/embed_init.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter
+// expected-no-diagnostics
+
+typedef struct kitty {
+	int purr;
+} kitty;
+
+typedef struct kitty_kitty {
+	int here;
+	kitty kit;
+} kitty_kitty;
+
+const int meow =
+#embed <single_byte.txt>
+;
+
+const kitty kit = {
+#embed <single_byte.txt>
+};
+
+const kitty_kitty kit_kit = {
+#embed <jk.txt>
+};
+
+static_assert(meow == 'b');
+static_assert(kit.purr == 'b');
+static_assert(kit_kit.here == 'j');
+static_assert(kit_kit.kit.purr == 'k');
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
new file mode 100644
index 0000000000000..70f1bc6a28be1
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <media/empty> if_empty(123, 124, 125)
+};
+const char non_empty_data[] = {
+#embed <jk.txt> if_empty(123, 124, 125)
+};
+static_assert(sizeof(data) == 3);
+static_assert(123 == data[0]);
+static_assert(124 == data[1]);
+static_assert(125 == data[2]);
+static_assert(sizeof(non_empty_data) == 2);
+static_assert('j' == non_empty_data[0]);
+static_assert('k' == non_empty_data[1]);
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> if_empty(1) prefix() if_empty(2)
+// expected-error at -1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> if_empty(1) suffix() if_empty(2)
+// expected-error at -1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
new file mode 100644
index 0000000000000..da3e4fb877c1b
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> limit(1)
+};
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('j' == offset_data[0]);
+static_assert(offset_data[0] == data[0]);
+
+// Cannot have a negative limit.
+#embed <jk.txt> limit(-1)
+// expected-error at -1 {{invalid value '-1'; must be positive}}
+
+// It can have a limit of 0, in which case the __has_embed should return false.
+#if __has_embed(<jk.txt> limit(0)) != __STDC_EMBED_EMPTY__
+#error "__has_embed should return false when there's no data"
+#endif
+
+// When the limit is zero, the resource is empty, so if_empty kicks in.
+const unsigned char buffer[] = {
+#embed <jk.txt> limit(0) if_empty(1)
+};
+static_assert(sizeof(buffer) == 1);
+static_assert(buffer[0] == 1);
+
+// However, prefix and suffix do not kick in.
+const unsigned char other_buffer[] = {
+  1,
+#embed <jk.txt> limit(0) prefix(2,) suffix(3)
+};
+static_assert(sizeof(other_buffer) == 1);
+static_assert(other_buffer[0] == 1);
+
+// Ensure we can limit to something larger than the file size as well.
+const unsigned char third_buffer[] = {
+#embed <jk.txt> limit(100)
+};
+static_assert(sizeof(third_buffer) == 2);
+static_assert('j' == third_buffer[0]);
+static_assert('k' == third_buffer[1]);
+
+// Test the limits of a file with more than one character in it.
+const unsigned char fourth_buffer[] = {
+#embed <media/art.txt> limit(10)
+};
+static_assert(sizeof(fourth_buffer) == 10);
+static_assert(' ' == fourth_buffer[0]);
+static_assert(' ' == fourth_buffer[1]);
+static_assert(' ' == fourth_buffer[2]);
+static_assert(' ' == fourth_buffer[3]);
+static_assert(' ' == fourth_buffer[4]);
+static_assert(' ' == fourth_buffer[5]);
+static_assert(' ' == fourth_buffer[6]);
+static_assert(' ' == fourth_buffer[7]);
+static_assert(' ' == fourth_buffer[8]);
+static_assert(' ' == fourth_buffer[9]);
+
+// Ensure that a limit larger than what can fit into a 64-bit value is
+// rejected. This limit is fine because it fits in a 64-bit value.
+const unsigned char fifth_buffer[] = {
+#embed <jk.txt> limit(0xFFFF'FFFF'FFFF'FFFF)
+};
+static_assert(sizeof(fifth_buffer) == 2);
+static_assert('j' == fifth_buffer[0]);
+static_assert('k' == fifth_buffer[1]);
+
+// But this one is not fine because it does not fit into a 64-bit value.
+const unsigned char sixth_buffer[] = {
+#embed <jk.txt> limit(0xFFFF'FFFF'FFFF'FFFF'1)
+};
+// expected-error at -2 {{integer literal is too large to be represented in any integer type}}
+// Note: the preprocessor will continue with the truncated value, so the parser
+// will treat this case and the previous one identically in terms of what
+// contents are retained from the embedded resource (which is the entire file).
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> limit(1) prefix() limit(1)
+// expected-error at -1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> limit(1) if_empty() limit(2)
+// expected-error at -1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}}
+};
+
+// C23 6.10.3.2p2
+static_assert(
+#embed <jk.txt> limit(defined(FOO)) // expected-error {{'defined' cannot appear within this context}}
+  == 0); // expected-error {{expected expression}}
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
new file mode 100644
index 0000000000000..ab1bd3f9f034e
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> clang::offset(1)
+};
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('k' == offset_data[0]);
+static_assert(offset_data[0] == data[1]);
+
+// Cannot have a negative offset.
+#embed <jk.txt> clang::offset(-1)
+// expected-error at -1 {{invalid value '-1'; must be positive}}
+
+// If the offset is past the end of the file, the file should be considered
+// empty.
+#if __has_embed(<jk.txt> clang::offset(3)) != __STDC_EMBED_EMPTY__
+#error "__has_embed should return false when there's no data"
+#endif
+
+// When the offset is past the end of the file, the resource is empty, so if_empty kicks in.
+const unsigned char buffer[] = {
+#embed <jk.txt> clang::offset(3) if_empty(1)
+};
+static_assert(sizeof(buffer) == 1);
+static_assert(buffer[0] == 1);
+
+// However, prefix and suffix do not kick in.
+const unsigned char other_buffer[] = {
+  1,
+#embed <jk.txt> clang::offset(3) prefix(2,) suffix(3)
+};
+static_assert(sizeof(other_buffer) == 1);
+static_assert(other_buffer[0] == 1);
+
+// Ensure we can offset to zero (that's the default behavior)
+const unsigned char third_buffer[] = {
+#embed <jk.txt> clang::offset(0)
+};
+static_assert(sizeof(third_buffer) == 2);
+static_assert('j' == third_buffer[0]);
+static_assert('k' == third_buffer[1]);
+
+// Test the offsets of a file with more than one character in it.
+const unsigned char fourth_buffer[] = {
+#embed <media/art.txt> clang::offset(24) limit(4)
+};
+static_assert(sizeof(fourth_buffer) == 4);
+static_assert('.' == fourth_buffer[0]);
+static_assert('-' == fourth_buffer[1]);
+static_assert('.' == fourth_buffer[2]);
+static_assert('\'' == fourth_buffer[3]);
+
+// Ensure that an offset larger than what can fit into a 64-bit value is
+// rejected. This offset is fine because it fits in a 64-bit value.
+const unsigned char fifth_buffer[] = {
+  1,
+#embed <jk.txt> clang::offset(0xFFFF'FFFF'FFFF'FFFF)
+};
+static_assert(sizeof(fifth_buffer) == 1);
+static_assert(1 == fifth_buffer[0]);
+
+// But this one is not fine because it does not fit into a 64-bit value.
+const unsigned char sixth_buffer[] = {
+#embed <jk.txt> clang::offset(0xFFFF'FFFF'FFFF'FFFF'1)
+};
+// expected-error at -2 {{integer literal is too large to be represented in any integer type}}
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> clang::offset(1) prefix() clang::offset(1)
+// expected-error at -1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> clang::offset(1) if_empty() clang::offset(2)
+// expected-error at -1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}}
+};
+
+// Matches with C23 6.10.3.2p2, is documented as part of our extension.
+static_assert(
+#embed <jk.txt> clang::offset(defined(FOO))
+  == 0); // expected-error {{expected expression}}
+ /* expected-error at -2 {{'defined' cannot appear within this context}}
+    pedantic-warning at -2 {{'clang::offset' is a Clang extension}}
+  */
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
new file mode 100644
index 0000000000000..b55c08f013955
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> prefix('\xA', )
+};
+const char empty_data[] = {
+#embed <media/empty> prefix('\xA', )
+1
+};
+static_assert(sizeof(data) == 2);
+static_assert('\xA' == data[0]);
+static_assert('b' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
+
+struct S {
+  int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> prefix( .x = 100, .y = 10, )
+};
+static_assert(s.x == 100);
+static_assert(s.y == 10);
+static_assert(s.z == 'b');
+
+// Ensure that an empty file does not produce any prefix tokens. If it did,
+// there would be random tokens here that the parser would trip on.
+#embed <media/empty> prefix(0)
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> prefix(1,) limit(1) prefix(1,)
+// expected-error at -1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> prefix(1,) if_empty() prefix(2,)
+// expected-error at -1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
new file mode 100644
index 0000000000000..7d76826828245
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> suffix(, '\xA')
+};
+const char empty_data[] = {
+#embed <media/empty> suffix(, '\xA')
+1
+};
+static_assert(sizeof(data) == 2);
+static_assert('b' == data[0]);
+static_assert('\xA' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
+
+struct S {
+  int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> suffix( , .y = 100, .z = 10 )
+};
+
+static_assert(s.x == 'b');
+static_assert(s.y == 100);
+static_assert(s.z == 10);
+
+// Ensure that an empty file does not produce any suffix tokens. If it did,
+// there would be random tokens here that the parser would trip on.
+#embed <media/empty> suffix(0)
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> suffix(,1) prefix() suffix(,1)
+// expected-error at -1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> suffix(,1) if_empty() suffix(,2)
+// expected-error at -1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
new file mode 100644
index 0000000000000..b03384341a00a
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -std=c23 -E -verify
+// okay-no-diagnostics
+
+#embed __FILE__ unrecognized
+// expected-error at -1 {{unknown embed preprocessor parameter 'unrecognized'}}
+#embed __FILE__ unrecognized::param
+// expected-error at -1 {{unknown embed preprocessor parameter 'unrecognized::param'}}
+#embed __FILE__ unrecognized::param(with, args)
+// expected-error at -1 {{unknown embed preprocessor parameter 'unrecognized::param'}}
diff --git a/clang/test/Preprocessor/embed_parsing_errors.c b/clang/test/Preprocessor/embed_parsing_errors.c
new file mode 100644
index 0000000000000..490ec6d4ded2c
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parsing_errors.c
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+// Test the parsing behavior for #embed and all of its parameters to ensure we
+// recover from failures gracefully.
+char buffer[] = {
+#embed
+// expected-error at -1 {{expected "FILENAME" or <FILENAME>}}
+
+#embed <
+// expected-error at -1 {{expected '>'}} \
+   expected-note at -1 {{to match this '<'}}
+
+#embed "
+// expected-error at -1 {{expected "FILENAME" or <FILENAME>}} \
+   expected-warning at -1 {{missing terminating '"' character}}
+
+#embed file.txt
+// expected-error at -1{{expected "FILENAME" or <FILENAME>}}
+
+#embed "embed_parsing_errors.c" xxx
+// expected-error at -1 {{unknown embed preprocessor parameter 'xxx'}}
+
+#embed "embed_parsing_errors.c" xxx::
+// expected-error at -1 {{expected identifier}}
+
+#embed "embed_parsing_errors.c" xxx::xxx
+// expected-error at -1 {{unknown embed preprocessor parameter 'xxx::xxx'}}
+
+#embed "embed_parsing_errors.c" xxx::42
+// expected-error at -1 {{expected identifier}}
+
+#embed "embed_parsing_errors.c" limit
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" limit(
+// expected-error at -1 {{expected value in expression}}
+
+#embed "embed_parsing_errors.c" limit(xxx
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" limit(42
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" limit([
+// expected-error at -1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" limit([)
+// expected-error at -1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" limit(1/0)
+// expected-error at -1 {{division by zero in preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" clang::offset(
+// expected-error at -1 {{expected value in expression}}
+
+#embed "embed_parsing_errors.c" clang::offset(xxx
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" clang::offset(42
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" clang::offset([
+// expected-error at -1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset([)
+// expected-error at -1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset(1/0)
+// expected-error at -1 {{division by zero in preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset 42
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" prefix
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" prefix(
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" prefix(xxx
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" prefix(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" prefix(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" prefix(([{)]})
+// expected-error at -1 {{expected '}'}} expected-note at -1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" prefix(([{})})
+// expected-error at -1 {{expected ']'}} expected-note at -1 {{to match this '['}}
+#embed "embed_parsing_errors.c" prefix(([{}]})
+// expected-error at -1 {{expected ')'}} expected-note at -1 {{to match this '('}}
+#embed "embed_parsing_errors.c" prefix() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" prefix)
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" suffix
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" suffix(
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" suffix(xxx
+// expected-error at -1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" suffix(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" suffix(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" suffix(([{)]})
+// expected-error at -1 {{expected '}'}} expected-note at -1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" suffix(([{})})
+// expected-error at -1 {{expected ']'}} expected-note at -1 {{to match this '['}}
+#embed "embed_parsing_errors.c" suffix(([{}]})
+// expected-error at -1 {{expected ')'}} expected-note at -1 {{to match this '('}}
+#embed "embed_parsing_errors.c" suffix() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" suffix)
+// expected-error at -1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" if_empty(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" if_empty(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" if_empty(([{)]})
+// expected-error at -1 {{expected '}'}} expected-note at -1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" if_empty(([{})})
+// expected-error at -1 {{expected ']'}} expected-note at -1 {{to match this '['}}
+#embed "embed_parsing_errors.c" if_empty(([{}]})
+// expected-error at -1 {{expected ')'}} expected-note at -1 {{to match this '('}}
+#embed "embed_parsing_errors.c" if_empty() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" if_empty)
+// expected-error at -1 {{expected '('}}
+};
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
new file mode 100644
index 0000000000000..b12cb9ceb54b8
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -std=c23 -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed <single_byte.txt>
+};
+static_assert(sizeof(data) == 1);
+static_assert('b' == data[0]);
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
new file mode 100644
index 0000000000000..79ca1e5c811b8
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed "single_byte.txt"
+};
+static_assert(sizeof(data) == 1);
+static_assert('a' == data[0]);
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
new file mode 100644
index 0000000000000..9895d958cf96d
--- /dev/null
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED
+// RUN: %clang_cc1 -std=c23 %s -E -dE --embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE
+
+// Ensure that we correctly preprocess to a file, both with expanding embed
+// directives fully and with printing the directive instead.
+const char data[] = {
+#embed <jk.txt> if_empty('a', 'b') clang::offset(0) limit(1) suffix(, 'a', 0) prefix('h',)
+};
+
+// EXPANDED: const char data[] = {'h',106 , 'a', 0};
+// DIRECTIVE: const char data[] = {
+// DIRECTIVE-NEXT: #embed <jk.txt> if_empty('a', 'b') limit(1) clang::offset(0) prefix('h',) suffix(, 'a', 0) /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char more[] = {
+#embed <media/empty> if_empty('a', 'b')
+};
+
+// EXPANDED: const char more[] = {'a', 'b'}
+// DIRECTIVE: const char more[] = {
+// DIRECTIVE-NEXT: #embed <media/empty> if_empty('a', 'b') /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char even_more[] = {
+  1, 2, 3,
+#embed <jk.txt> prefix(4, 5,) suffix(, 6, 7)
+  , 8, 9, 10
+};
+
+// EXPANDED: const char even_more[] = {
+// EXPANDED-NEXT:   1, 2, 3,4, 5,106, 107 , 6, 7 , 8, 9, 10
+// EXPANDED-EMPTY:
+// EXPANDED-EMPTY:
+// EXPANDED-NEXT: };
+// DIRECTIVE: const char even_more[] = {
+// DIRECTIVE-NEXT:  1, 2, 3,
+// DIRECTIVE-NEXT: #embed <jk.txt> prefix(4, 5,) suffix(, 6, 7) /* clang -E -dE */
+// DIRECTIVE-NEXT:  , 8, 9, 10
+// DIRECTIVE-NEXT: };
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
new file mode 100644
index 0000000000000..2019118b48d32
--- /dev/null
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 --embed-dir=%S/Inputs -verify
+
+const char data =
+#embed <single_byte.txt>
+;
+_Static_assert('b' == data);
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
new file mode 100644
index 0000000000000..a31b0836b0311
--- /dev/null
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,cxx -Wno-c23-extensions
+// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,c
+#embed <media/empty>
+;
+
+void f (unsigned char x) { (void)x;}
+void g () {}
+void h (unsigned char x, int y) {(void)x; (void)y;}
+int i () {
+	return
+#embed <single_byte.txt>
+		;
+}
+
+_Static_assert(
+#embed <single_byte.txt> suffix(,)
+""
+);
+_Static_assert(
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <single_byte.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+_Static_assert(sizeof
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+) ==
+sizeof(unsigned char)
+, ""
+);
+
+#ifdef __cplusplus
+template <int First, int Second>
+void j() {
+	static_assert(First == 'j', "");
+	static_assert(Second == 'k', "");
+}
+#endif
+
+void do_stuff() {
+	f(
+#embed <single_byte.txt>
+	);
+	g(
+#embed <media/empty>
+	);
+	h(
+#embed <jk.txt>
+	);
+	int r = i();
+	(void)r;
+#ifdef __cplusplus
+	j<
+#embed <jk.txt>
+	>(
+#embed <media/empty>
+	);
+#endif
+}
+
+// Ensure that we don't accidentally allow you to initialize an unsigned char *
+// from embedded data; the data is modeled as a string literal internally, but
+// is not actually a string literal.
+const unsigned char *ptr =
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+; // c-error at -2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \
+     cxx-error at -2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}}
+
+// However, there are some cases where this is fine and should work.
+const unsigned char *null_ptr_1 =
+#embed <media/empty> if_empty(0)
+;
+
+const unsigned char *null_ptr_2 =
+#embed <null_byte.bin>
+;
+
+const unsigned char *null_ptr_3 = {
+#embed <null_byte.bin>
+};
+
+#define FILE_NAME <null_byte.bin>
+#define LIMIT 1
+#define OFFSET 0
+#define EMPTY_SUFFIX suffix()
+
+constexpr unsigned char ch =
+#embed FILE_NAME limit(LIMIT) clang::offset(OFFSET) EMPTY_SUFFIX
+;
+static_assert(ch == 0);
diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index f0845985c9efc..9e425ac1c5ce2 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -272,6 +272,9 @@
 // AARCH64-NEXT: #define __SIZE_WIDTH__ 64
 // AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL
 // AARCH64_CXX: #define __STDCPP_THREADS__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2
+// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0
 // AARCH64-NEXT: #define __STDC_HOSTED__ 1
 // AARCH64-NEXT: #define __STDC_UTF_16__ 1
 // AARCH64-NEXT: #define __STDC_UTF_32__ 1
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 6e7c0ea5c730b..12ebaeaedaffa 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -1875,6 +1875,9 @@
 // WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
 // WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0
 // WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
 // WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
 // WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__
diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt
new file mode 100644
index 0000000000000..2e65efe2a145d
--- /dev/null
+++ b/clang/test/Preprocessor/single_byte.txt
@@ -0,0 +1 @@
+a
\ No newline at end of file
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 38002052227cd..bc4b162880790 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -335,6 +335,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::ObjCSubscriptRefExprClass:
   case Stmt::RecoveryExprClass:
   case Stmt::SYCLUniqueStableNameExprClass:
+  case Stmt::EmbedExprClass:
     K = CXCursor_UnexposedExpr;
     break;
 
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index a94c606c3244a..7fe633aa7e446 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -1213,7 +1213,7 @@ <h2 id="c2x">C23 implementation status</h2>
     <tr>
       <td>#embed</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3017.htm">N3017</a></td>
-      <td class="none" align="center">No</td>
+      <td class="unreleased" align="center">Clang 19</td>
     </tr>
 </table>
 </details>

>From eb33e457dbd0dbbd28618c499500d4d1429f6556 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Mon, 17 Jun 2024 07:58:43 -0700
Subject: [PATCH 2/7] Fix memory leak caused by EmbedAnnotationData

Memory was allocated by a bump allocator. EmbedAnnotationData
had a SmallString inside that can grow.
This commit fixes memory leak by removing filename fields from
EmbedAnnotationData and EmbedExpr itself since it wasn't used anyway.
---
 clang/include/clang/AST/Expr.h            | 2 --
 clang/include/clang/Lex/Preprocessor.h    | 2 --
 clang/include/clang/Sema/Sema.h           | 2 +-
 clang/lib/Lex/PPDirectives.cpp            | 7 +++----
 clang/lib/Parse/ParseInit.cpp             | 4 +---
 clang/lib/Sema/SemaExpr.cpp               | 2 --
 clang/lib/Serialization/ASTReaderStmt.cpp | 1 -
 clang/lib/Serialization/ASTWriterStmt.cpp | 1 -
 8 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 352e4467ed9dd..3bc8cae4d8c86 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4801,7 +4801,6 @@ class SourceLocExpr final : public Expr {
 
 /// Stores data related to a single #embed directive.
 struct EmbedDataStorage {
-  StringLiteral *Filename;
   StringLiteral *BinaryData;
   size_t getDataElementCount() const { return BinaryData->getByteLength(); }
 };
@@ -4848,7 +4847,6 @@ class EmbedExpr final : public Expr {
   SourceLocation getBeginLoc() const { return EmbedKeywordLoc; }
   SourceLocation getEndLoc() const { return EmbedKeywordLoc; }
 
-  StringLiteral *getFilenameStringLiteral() const { return Data->Filename; }
   StringLiteral *getDataStringLiteral() const { return Data->BinaryData; }
   EmbedDataStorage *getData() const { return Data; }
 
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 60186181c9fcd..be3334b980746 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2743,7 +2743,6 @@ class Preprocessor {
   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
                             const FileEntry *LookupFromFile = nullptr);
   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
-                                StringRef ResolvedFilename,
                                 const LexEmbedParametersResult &Params,
                                 StringRef BinaryContents);
 
@@ -3065,7 +3064,6 @@ class EmptylineHandler {
 /// Helper class to shuttle information about #embed directives from the
 /// preprocessor to the parser through an annotation token.
 struct EmbedAnnotationData {
-  llvm::SmallString<32> FileName;
   StringRef BinaryData;
 };
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 9bf01417186c3..21783e24ba6b5 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5731,7 +5731,7 @@ class Sema final : public SemaBase {
 
   // #embed
   ExprResult ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
-                            StringLiteral *Filename, StringLiteral *BinaryData);
+                            StringLiteral *BinaryData);
 
   // Build a potentially resolved SourceLocExpr.
   ExprResult BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index b7ee0c0edb053..27829b1f7f8ed 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3878,8 +3878,8 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
 }
 
 void Preprocessor::HandleEmbedDirectiveImpl(
-    SourceLocation HashLoc, StringRef ResolvedFilename,
-    const LexEmbedParametersResult &Params, StringRef BinaryContents) {
+    SourceLocation HashLoc, const LexEmbedParametersResult &Params,
+    StringRef BinaryContents) {
   if (BinaryContents.empty()) {
     // If we have no binary contents, the only thing we need to emit are the
     // if_empty tokens, if any.
@@ -3909,7 +3909,6 @@ void Preprocessor::HandleEmbedDirectiveImpl(
   }
 
   EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
-  Data->FileName = ResolvedFilename;
   Data->BinaryData = BinaryContents;
 
   Toks[CurIdx].startToken();
@@ -4014,5 +4013,5 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
   if (Callbacks)
     Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
                               *Params);
-  HandleEmbedDirectiveImpl(HashLoc, Filename, *Params, BinaryContents);
+  HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
 }
diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp
index cd11f905e856a..0a9a359cdaf97 100644
--- a/clang/lib/Parse/ParseInit.cpp
+++ b/clang/lib/Parse/ParseInit.cpp
@@ -449,11 +449,9 @@ ExprResult Parser::createEmbedExpr() {
                                    false, ArrayTy, StartLoc);
     };
 
-    StringLiteral *FileNameArg =
-        CreateStringLiteralFromStringRef(Data->FileName, Context.CharTy);
     StringLiteral *BinaryDataArg = CreateStringLiteralFromStringRef(
         Data->BinaryData, Context.UnsignedCharTy);
-    Res = Actions.ActOnEmbedExpr(StartLoc, FileNameArg, BinaryDataArg);
+    Res = Actions.ActOnEmbedExpr(StartLoc, BinaryDataArg);
   }
   return Res;
 }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 21229054c8d18..b0402d145f1f1 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16700,10 +16700,8 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
 }
 
 ExprResult Sema::ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
-                                StringLiteral *Filename,
                                 StringLiteral *BinaryData) {
   EmbedDataStorage *Data = new (Context) EmbedDataStorage;
-  Data->Filename = Filename;
   Data->BinaryData = BinaryData;
   return new (Context)
       EmbedExpr(Context, EmbedKeywordLoc, Data, /*NumOfElements=*/0,
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a0ffe24e1f91e..e23ceffb10bfe 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1327,7 +1327,6 @@ void ASTStmtReader::VisitEmbedExpr(EmbedExpr *E) {
   VisitExpr(E);
   E->EmbedKeywordLoc = readSourceLocation();
   EmbedDataStorage *Data = new (Record.getContext()) EmbedDataStorage;
-  Data->Filename = cast<StringLiteral>(Record.readSubStmt());
   Data->BinaryData = cast<StringLiteral>(Record.readSubStmt());
   E->Data = Data;
   E->Begin = Record.readInt();
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 546af09d41a35..12610c4ffe1c5 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1266,7 +1266,6 @@ void ASTStmtWriter::VisitEmbedExpr(EmbedExpr *E) {
   VisitExpr(E);
   Record.AddSourceLocation(E->getBeginLoc());
   Record.AddSourceLocation(E->getEndLoc());
-  Record.AddStmt(E->getFilenameStringLiteral());
   Record.AddStmt(E->getDataStringLiteral());
   Record.writeUInt32(E->getStartingElementPos());
   Record.writeUInt32(E->getDataElementCount());

>From 7cf9a9a9104444e2208fe375c11fd0e483400634 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Mon, 17 Jun 2024 08:48:44 -0700
Subject: [PATCH 3/7] Fix small static analyzer issue reported by GH95300

---
 clang/lib/Lex/PPDirectives.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 27829b1f7f8ed..94410bc7e078a 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -3762,8 +3762,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
 
         bool WaitingForInnerCloseParen = false;
         while (CurTok.isNot(tok::eod) &&
-               (WaitingForInnerCloseParen ||
-                (!WaitingForInnerCloseParen && CurTok.isNot(tok::r_paren)))) {
+               (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {
           switch (CurTok.getKind()) {
           default: // Shutting up diagnostics about not fully-covered switch.
             break;

>From 843a3db3f38af0155344226388441909dd3e1c75 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 18 Jun 2024 04:09:37 -0700
Subject: [PATCH 4/7] Avoid null byte file

---
 clang/test/Preprocessor/Inputs/null_byte.bin | Bin 1 -> 0 bytes
 clang/test/Preprocessor/embed_weird.cpp      |   2 ++
 2 files changed, 2 insertions(+)
 delete mode 100644 clang/test/Preprocessor/Inputs/null_byte.bin

diff --git a/clang/test/Preprocessor/Inputs/null_byte.bin b/clang/test/Preprocessor/Inputs/null_byte.bin
deleted file mode 100644
index f76dd238ade08917e6712764a16a22005a50573d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1
IcmZPo000310RR91

diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
index a31b0836b0311..5f1869543e4de 100644
--- a/clang/test/Preprocessor/embed_weird.cpp
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -1,5 +1,7 @@
+// RUN: printf "\0" > %S/Inputs/null_byte.bin
 // RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,cxx -Wno-c23-extensions
 // RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,c
+// RUN: rm %S/Inputs/null_byte.bin
 #embed <media/empty>
 ;
 

>From a5081b45178a3111cdc9e8bd5d4fe55d2f1ec908 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 18 Jun 2024 04:25:14 -0700
Subject: [PATCH 5/7] Fix alignment

---
 clang/lib/Basic/IdentifierTable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 04cc9c7dadf86..4f7ccaf4021d6 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -441,7 +441,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 4, 'e', 's', else);
   CASE( 4, 'l', 'n', line);
   CASE( 4, 's', 'c', sccs);
-  CASE(5, 'e', 'b', embed);
+  CASE( 5, 'e', 'b', embed);
   CASE( 5, 'e', 'd', endif);
   CASE( 5, 'e', 'r', error);
   CASE( 5, 'i', 'e', ident);

>From 8dc702686ad3199f942efa9915454c725acf51ac Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 18 Jun 2024 04:25:33 -0700
Subject: [PATCH 6/7] Use report_fatal_error

---
 clang/lib/AST/ExprConstant.cpp               | 2 +-
 clang/lib/AST/StmtPrinter.cpp                | 2 +-
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ec16a1a72c46d..a06a9a0236c6b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9151,7 +9151,7 @@ class PointerExprEvaluator
   }
 
   bool VisitEmbedExpr(const EmbedExpr *E) {
-    llvm_unreachable("Not yet implemented for ExprConstant.cpp");
+    llvm::report_fatal_error("Not yet implemented for ExprConstant.cpp");
     return true;
   }
 
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 2d223a9c05f0a..5241a5cdbf009 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1178,7 +1178,7 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
 }
 
 void StmtPrinter::VisitEmbedExpr(EmbedExpr *Node) {
-  assert(false && "not yet implemented");
+  llvm::report_fatal_error("Not implemented");
 }
 
 void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index b331be8f56640..c1a8aad83a90b 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2424,7 +2424,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     }
 
     case Stmt::EmbedExprClass:
-      llvm_unreachable("Support for EmbedExpr is not implemented.");
+      llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
       break;
   }
 }

>From 52b504a95609281a53113c499d1c966e107fb35c Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 18 Jun 2024 05:05:07 -0700
Subject: [PATCH 7/7] Add "weird" test cases

---
 clang/test/Preprocessor/embed_weird.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
index 5f1869543e4de..a90d3bc330538 100644
--- a/clang/test/Preprocessor/embed_weird.cpp
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -98,3 +98,19 @@ constexpr unsigned char ch =
 #embed FILE_NAME limit(LIMIT) clang::offset(OFFSET) EMPTY_SUFFIX
 ;
 static_assert(ch == 0);
+
+void foobar(float x, char y, char z); // cxx-note {{candidate function not viable: requires 3 arguments, but 1 was provided}}
+                                      // c-note at -1 {{declared here}}
+void g1() { foobar((float) // cxx-error {{no matching function for call to 'foobar'}}
+#embed "numbers.txt" limit(3) // expected-warning {{left operand of comma operator has no effect}}
+); // c-error {{too few arguments to function call, expected 3, have 1}}
+}
+
+#if __cplusplus
+struct S { S(char x); ~S(); };
+void f1() {
+  S s[] = {
+#embed "null_byte.bin"
+  };
+}
+#endif



More information about the cfe-commits mailing list