[clang] [clang] Add basic support for #embed. (PR #76480)

Junior Rantila via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 27 17:29:58 PST 2023


https://github.com/juniorrantila created https://github.com/llvm/llvm-project/pull/76480

This patch takes the first steps toward C23 #embed support. We can include binary files, but embed-parameter-sequences are not implemented. Adding the embedded file to the -M dependency array is also not implemented.

>From 00e9c665c18c1efafc15e293ea6dc35b1ed32e48 Mon Sep 17 00:00:00 2001
From: Junior Rantila <junior.rantila at gmail.com>
Date: Wed, 27 Dec 2023 22:00:35 +0100
Subject: [PATCH] [clang] Add basic support for #embed.

This patch takes the first steps toward C23 #embed support.
We can include binary files, but embed-parameter-sequences are not
implemented. Adding the embedded file to the -M dependency array is
also not implemented.
---
 .../clang/Basic/DiagnosticParseKinds.td       |   2 +
 clang/include/clang/Basic/TokenKinds.def      |   3 +
 .../clang/Lex/DependencyDirectivesScanner.h   |   1 +
 clang/include/clang/Lex/Preprocessor.h        |   3 +
 clang/lib/Basic/IdentifierTable.cpp           |   1 +
 clang/lib/Lex/DependencyDirectivesScanner.cpp |  12 +
 clang/lib/Lex/Lexer.cpp                       |   1 +
 clang/lib/Lex/PPDirectives.cpp                | 221 +++++++++++++++++-
 8 files changed, 240 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index e4b1069cde1850..56258df192f9ff 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -165,6 +165,8 @@ def ext_c99_feature : Extension<
   "'%0' is a C99 extension">, InGroup<C99>;
 def ext_c11_feature : Extension<
   "'%0' is a C11 extension">, InGroup<C11>;
+def ext_c23_feature : Extension<
+  "'%0' is a C23 extension">, InGroup<C23>;
 def warn_c23_compat_keyword : Warning<
  "'%0' is incompatible with C standards before C23">,
  InGroup<CPre23Compat>, DefaultIgnore;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 3f0e1e1a7d45ad..591684c004f908 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -113,6 +113,9 @@ PPKEYWORD(defined)
 PPKEYWORD(include)
 PPKEYWORD(__include_macros)
 
+// C23 6.10.2 - Binary resource inclusion
+PPKEYWORD(embed)
+
 // C99 6.10.3 - Macro Replacement.
 PPKEYWORD(define)
 PPKEYWORD(undef)
diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index 0e115906fbfe51..b00b9391d0074a 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -70,6 +70,7 @@ enum DirectiveKind : uint8_t {
   pp_pragma_include_alias,
   pp_pragma_system_header,
   pp_include_next,
+  pp_embed,
   pp_if,
   pp_ifdef,
   pp_ifndef,
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4ec21a8b6be2c8..4ff097eae78571 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2689,6 +2689,9 @@ class Preprocessor {
   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
   void HandleMicrosoftImportDirective(Token &Tok);
+  void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+                            ConstSearchDirIterator LookupFrom = nullptr,
+                            const FileEntry *LookupFromFile = nullptr);
 
 public:
   /// Check that the given module is available, producing a diagnostic if not.
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 5902c6dc3ce0b4..50cf1925acf49e 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -446,6 +446,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 5, 'i', 'e', ident);
   CASE( 5, 'i', 'd', ifdef);
   CASE( 5, 'u', 'd', undef);
+  CASE( 5, 'e', 'b', embed);
 
   CASE( 6, 'a', 's', assert);
   CASE( 6, 'd', 'f', define);
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 980f865cf24c97..867614cdb27167 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -91,6 +91,9 @@ struct Scanner {
   dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
                                                         const char *const End);
 
+  dependency_directives_scan::Token &lexEmbedFilename(const char *&First,
+                                                      const char *const End);
+
   void skipLine(const char *&First, const char *const End);
   void skipDirective(StringRef Name, const char *&First, const char *const End);
 
@@ -541,6 +544,11 @@ Scanner::lexIncludeFilename(const char *&First, const char *const End) {
   return CurDirToks.back();
 }
 
+dependency_directives_scan::Token &
+Scanner::lexEmbedFilename(const char *&First, const char *const End) {
+  return lexIncludeFilename(First, End);
+}
+
 void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
   while (true) {
     const dependency_directives_scan::Token &Tok = lexToken(First, End);
@@ -875,6 +883,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
   auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
                   .Case("include", pp_include)
                   .Case("__include_macros", pp___include_macros)
+                  .Case("embed", pp_embed)
                   .Case("define", pp_define)
                   .Case("undef", pp_undef)
                   .Case("import", pp_import)
@@ -903,6 +912,9 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
   case pp_import:
     lexIncludeFilename(First, End);
     break;
+  case pp_embed:
+    lexEmbedFilename(First, End);
+    break;
   default:
     break;
   }
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 50b56265f6e164..527180daa5f9aa 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -4541,6 +4541,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
       llvm_unreachable("unexpected 'pp_none'");
     case pp_include:
     case pp___include_macros:
+    case pp_embed:
     case pp_define:
     case pp_undef:
     case pp_import:
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9f82a6d073e3ba..31bbc9ea1beeb9 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/CharInfo.h"
+#include "clang/Basic/DiagnosticLex.h"
+#include "clang/Basic/DiagnosticParse.h"
 #include "clang/Basic/DirectoryEntry.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/IdentifierTable.h"
@@ -20,10 +22,9 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/Utils.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/HeaderSearchOptions.h"
-#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/ModuleLoader.h"
@@ -39,16 +40,15 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <algorithm>
 #include <cassert>
 #include <cstring>
-#include <new>
 #include <optional>
 #include <string>
 #include <utility>
@@ -1242,6 +1242,11 @@ void Preprocessor::HandleDirective(Token &Result) {
       // Handle -imacros.
       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
 
+    // C23 6.10.2 - Binary resource inclusion
+    case tok::pp_embed:
+      // Handle #embed.
+      return HandleEmbedDirective(SavedHash.getLocation(), Result);
+
     // C99 6.10.3 - Macro Replacement.
     case tok::pp_define:
       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
@@ -2014,6 +2019,214 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   }
 }
 
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+                                        ConstSearchDirIterator LookupFrom,
+                                        const FileEntry *LookupFromFile) {
+  if (!getLangOpts().C23) {
+    Diag(EmbedTok, diag::ext_c23_feature)
+        << EmbedTok.getIdentifierInfo()->getNameStart();
+  }
+
+  Token FilenameTok;
+  if (LexHeaderName(FilenameTok))
+    return;
+
+  if (FilenameTok.isNot(tok::header_name)) {
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    if (FilenameTok.isNot(tok::eod))
+      DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // FIXME: Add support for embed parameter sequence.
+  CheckEndOfDirective(EmbedTok.getIdentifierInfo()->getNameStart());
+
+  SmallString<128> FilenameBuffer;
+  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+  SourceLocation CharEnd = FilenameTok.getEndLoc();
+
+  CharSourceRange FilenameRange =
+      CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
+  bool isAngled =
+      GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  if (Filename.empty())
+    return;
+
+  // Search include directories.
+  bool IsMapped = false;
+  bool IsFrameworkFound = false;
+  ConstSearchDirIterator CurDir = nullptr;
+  SmallString<1024> SearchPath;
+  SmallString<1024> RelativePath;
+  // We get the raw path only if we have 'Callbacks' to which we later pass
+  // the path.
+  ModuleMap::KnownHeader SuggestedModule;
+  SourceLocation FilenameLoc = FilenameTok.getLocation();
+  StringRef LookupFilename = Filename;
+
+  // Normalize slashes when compiling with -fms-extensions on non-Windows. This
+  // is unnecessary on Windows since the filesystem there handles backslashes.
+  SmallString<128> NormalizedPath;
+  llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
+  if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
+    NormalizedPath = Filename.str();
+    llvm::sys::path::native(NormalizedPath);
+    LookupFilename = NormalizedPath;
+    BackslashStyle = llvm::sys::path::Style::windows;
+  }
+
+  OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
+      &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
+      IsFrameworkFound, false, IsMapped, LookupFrom, LookupFromFile,
+      LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
+  if (!File)
+    return;
+
+  // The #embed file will be considered to be a system header if either it is
+  // in a system include directory, or if the #embeder is a system include
+  // header.
+  SrcMgr::CharacteristicKind FileCharacter = HeaderInfo.getFileDirFlavor(*File);
+
+  // Issue a diagnostic if the name of the file on disk has a different case
+  // than the one we're about to open.
+  const bool CheckIncludePathPortability =
+      !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
+
+  if (CheckIncludePathPortability) {
+    StringRef Name = LookupFilename;
+    StringRef NameWithoriginalSlashes = Filename;
+#if defined(_WIN32)
+    // Skip UNC prefix if present. (tryGetRealPathName() always
+    // returns a path with the prefix skipped.)
+    bool NameWasUNC = Name.consume_front("\\\\?\\");
+    NameWithoriginalSlashes.consume_front("\\\\?\\");
+#endif
+    StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
+    SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
+                                          llvm::sys::path::end(Name));
+#if defined(_WIN32)
+    // -Wnonportable-include-path is designed to diagnose includes using
+    // case even on systems with a case-insensitive file system.
+    // On Windows, RealPathName always starts with an upper-case drive
+    // letter for absolute paths, but Name might start with either
+    // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
+    // ("foo" will always have on-disk case, no matter which case was
+    // used in the cd command). To not emit this warning solely for
+    // the drive letter, whose case is dependent on if `cd` is used
+    // with upper- or lower-case drive letters, always consider the
+    // given drive letter case as correct for the purpose of this warning.
+    SmallString<128> FixedDriveRealPath;
+    if (llvm::sys::path::is_absolute(Name) &&
+        llvm::sys::path::is_absolute(RealPathName) &&
+        toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
+        isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
+      assert(Components.size() >= 3 && "should have drive, backslash, name");
+      assert(Components[0].size() == 2 && "should start with drive");
+      assert(Components[0][1] == ':' && "should have colon");
+      FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
+      RealPathName = FixedDriveRealPath;
+    }
+#endif
+
+    if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
+      SmallString<128> Path;
+      Path.reserve(Name.size() + 2);
+      Path.push_back(isAngled ? '<' : '"');
+
+      const auto IsSep = [BackslashStyle](char c) {
+        return llvm::sys::path::is_separator(c, BackslashStyle);
+      };
+
+      for (auto Component : Components) {
+        // On POSIX, Components will contain a single '/' as first element
+        // exactly if Name is an absolute path.
+        // On Windows, it will contain "C:" followed by '\' for absolute paths.
+        // The drive letter is optional for absolute paths on Windows, but
+        // clang currently cannot process absolute paths in #embed lines that
+        // don't have a drive.
+        // If the first entry in Components is a directory separator,
+        // then the code at the bottom of this loop that keeps the original
+        // directory separator style copies it. If the second entry is
+        // a directory separator (the C:\ case), then that separator already
+        // got copied when the C: was processed and we want to skip that entry.
+        if (!(Component.size() == 1 && IsSep(Component[0])))
+          Path.append(Component);
+        else if (Path.size() != 1)
+          continue;
+
+        // Append the separator(s) the user used, or the close quote
+        if (Path.size() > NameWithoriginalSlashes.size()) {
+          Path.push_back(isAngled ? '>' : '"');
+          continue;
+        }
+        assert(IsSep(NameWithoriginalSlashes[Path.size() - 1]));
+        do
+          Path.push_back(NameWithoriginalSlashes[Path.size() - 1]);
+        while (Path.size() <= NameWithoriginalSlashes.size() &&
+               IsSep(NameWithoriginalSlashes[Path.size() - 1]));
+      }
+
+#if defined(_WIN32)
+      // Restore UNC prefix if it was there.
+      if (NameWasUNC)
+        Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
+#endif
+
+      // For user files and known standard headers, issue a diagnostic.
+      // For other system headers, don't. They can be controlled separately.
+      auto DiagId =
+          (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
+              ? diag::pp_nonportable_path
+              : diag::pp_nonportable_system_path;
+      Diag(FilenameTok, DiagId)
+          << Path << FixItHint::CreateReplacement(FilenameRange, Path);
+    }
+  }
+
+  // Look up the file, create a File ID for it.
+  SourceLocation EmbedPos = FilenameTok.getLocation();
+  // If the filename string was the result of macro expansions, set the embed
+  // position on the file where it will be embedded and after the expansions.
+  if (EmbedPos.isMacroID())
+    EmbedPos = SourceMgr.getExpansionRange(EmbedPos).getEnd();
+  FileID FID = SourceMgr.createFileID(*File, EmbedPos, FileCharacter);
+  if (!FID.isValid()) {
+    TheModuleLoader.HadFatalFailure = true;
+    assert(TheModuleLoader.HadFatalFailure &&
+           "This should be an early exit only to a fatal error");
+    TheModuleLoader.HadFatalFailure = true;
+    EmbedTok.setKind(tok::eof);
+    CurLexer->cutOffLexing();
+    return;
+  }
+
+  const auto FileBuffer = getFileManager().getBufferForFile(*File);
+  if (!FileBuffer)
+    return;
+  const auto Buffer = FileBuffer.get()->getBuffer();
+  if (Buffer.empty())
+    return;
+
+  std::string ProcessedBuffer = R"(
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wc++11-narrowing"
+  )";
+  for (size_t i = 0; i < Buffer.size(); ++i) {
+    auto c = Buffer[i];
+    ProcessedBuffer += "0x" + llvm::toHex(c) + ",";
+    if ((i + 1) % 16 == 0)
+      ProcessedBuffer += "\n";
+  }
+  ProcessedBuffer += "\n#pragma clang diagnostic pop\n";
+  llvm::MemoryBufferRef Buf(
+      StringRef(ProcessedBuffer).copy(getPreprocessorAllocator()), Filename);
+  EnterSourceFile(SourceMgr.createFileID(Buf), CurDir,
+                  FilenameTok.getLocation());
+}
+
 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
     ConstSearchDirIterator *CurDir, StringRef &Filename,
     SourceLocation FilenameLoc, CharSourceRange FilenameRange,



More information about the cfe-commits mailing list