[clang] [clang] Add basic support for #embed. (PR #76480)
Junior Rantila via cfe-commits
cfe-commits at lists.llvm.org
Wed Dec 27 17:32:54 PST 2023
https://github.com/juniorrantila updated https://github.com/llvm/llvm-project/pull/76480
>From a51561dcfb9877c0881651ecdc9de8928991506c Mon Sep 17 00:00:00 2001
From: Junior Rantila <junior.rantila at gmail.com>
Date: Wed, 27 Dec 2023 22:00:35 +0100
Subject: [PATCH] [clang] Add basic support for #embed.
This patch takes the first steps toward C23 #embed support.
We can include binary files, but embed-parameter-sequences are not
implemented. Adding the embedded file to the -M dependency array is
also not implemented.
---
.../clang/Basic/DiagnosticParseKinds.td | 2 +
clang/include/clang/Basic/TokenKinds.def | 3 +
.../clang/Lex/DependencyDirectivesScanner.h | 1 +
clang/include/clang/Lex/Preprocessor.h | 3 +
clang/lib/Basic/IdentifierTable.cpp | 1 +
clang/lib/Lex/DependencyDirectivesScanner.cpp | 12 +
clang/lib/Lex/Lexer.cpp | 1 +
clang/lib/Lex/PPDirectives.cpp | 221 +++++++++++++++++-
8 files changed, 240 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index e4b1069cde1850..56258df192f9ff 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -165,6 +165,8 @@ def ext_c99_feature : Extension<
"'%0' is a C99 extension">, InGroup<C99>;
def ext_c11_feature : Extension<
"'%0' is a C11 extension">, InGroup<C11>;
+def ext_c23_feature : Extension<
+ "'%0' is a C23 extension">, InGroup<C23>;
def warn_c23_compat_keyword : Warning<
"'%0' is incompatible with C standards before C23">,
InGroup<CPre23Compat>, DefaultIgnore;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 3f0e1e1a7d45ad..591684c004f908 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -113,6 +113,9 @@ PPKEYWORD(defined)
PPKEYWORD(include)
PPKEYWORD(__include_macros)
+// C23 6.10.2 - Binary resource inclusion
+PPKEYWORD(embed)
+
// C99 6.10.3 - Macro Replacement.
PPKEYWORD(define)
PPKEYWORD(undef)
diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index 0e115906fbfe51..b00b9391d0074a 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -70,6 +70,7 @@ enum DirectiveKind : uint8_t {
pp_pragma_include_alias,
pp_pragma_system_header,
pp_include_next,
+ pp_embed,
pp_if,
pp_ifdef,
pp_ifndef,
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4ec21a8b6be2c8..4ff097eae78571 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2689,6 +2689,9 @@ class Preprocessor {
void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
void HandleMicrosoftImportDirective(Token &Tok);
+ void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+ ConstSearchDirIterator LookupFrom = nullptr,
+ const FileEntry *LookupFromFile = nullptr);
public:
/// Check that the given module is available, producing a diagnostic if not.
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 5902c6dc3ce0b4..459e03f337fa87 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -446,6 +446,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
CASE( 5, 'i', 'e', ident);
CASE( 5, 'i', 'd', ifdef);
CASE( 5, 'u', 'd', undef);
+ CASE(5, 'e', 'b', embed);
CASE( 6, 'a', 's', assert);
CASE( 6, 'd', 'f', define);
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 980f865cf24c97..867614cdb27167 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -91,6 +91,9 @@ struct Scanner {
dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
const char *const End);
+ dependency_directives_scan::Token &lexEmbedFilename(const char *&First,
+ const char *const End);
+
void skipLine(const char *&First, const char *const End);
void skipDirective(StringRef Name, const char *&First, const char *const End);
@@ -541,6 +544,11 @@ Scanner::lexIncludeFilename(const char *&First, const char *const End) {
return CurDirToks.back();
}
+dependency_directives_scan::Token &
+Scanner::lexEmbedFilename(const char *&First, const char *const End) {
+ return lexIncludeFilename(First, End);
+}
+
void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
while (true) {
const dependency_directives_scan::Token &Tok = lexToken(First, End);
@@ -875,6 +883,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
.Case("include", pp_include)
.Case("__include_macros", pp___include_macros)
+ .Case("embed", pp_embed)
.Case("define", pp_define)
.Case("undef", pp_undef)
.Case("import", pp_import)
@@ -903,6 +912,9 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
case pp_import:
lexIncludeFilename(First, End);
break;
+ case pp_embed:
+ lexEmbedFilename(First, End);
+ break;
default:
break;
}
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 50b56265f6e164..527180daa5f9aa 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -4541,6 +4541,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
llvm_unreachable("unexpected 'pp_none'");
case pp_include:
case pp___include_macros:
+ case pp_embed:
case pp_define:
case pp_undef:
case pp_import:
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9f82a6d073e3ba..31bbc9ea1beeb9 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/DiagnosticLex.h"
+#include "clang/Basic/DiagnosticParse.h"
#include "clang/Basic/DirectoryEntry.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/IdentifierTable.h"
@@ -20,10 +22,9 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/Utils.h"
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/HeaderSearchOptions.h"
-#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleLoader.h"
@@ -39,16 +40,15 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/AlignOf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
#include <cassert>
#include <cstring>
-#include <new>
#include <optional>
#include <string>
#include <utility>
@@ -1242,6 +1242,11 @@ void Preprocessor::HandleDirective(Token &Result) {
// Handle -imacros.
return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
+ // C23 6.10.2 - Binary resource inclusion
+ case tok::pp_embed:
+ // Handle #embed.
+ return HandleEmbedDirective(SavedHash.getLocation(), Result);
+
// C99 6.10.3 - Macro Replacement.
case tok::pp_define:
return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
@@ -2014,6 +2019,214 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
}
}
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+ ConstSearchDirIterator LookupFrom,
+ const FileEntry *LookupFromFile) {
+ if (!getLangOpts().C23) {
+ Diag(EmbedTok, diag::ext_c23_feature)
+ << EmbedTok.getIdentifierInfo()->getNameStart();
+ }
+
+ Token FilenameTok;
+ if (LexHeaderName(FilenameTok))
+ return;
+
+ if (FilenameTok.isNot(tok::header_name)) {
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ if (FilenameTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // FIXME: Add support for embed parameter sequence.
+ CheckEndOfDirective(EmbedTok.getIdentifierInfo()->getNameStart());
+
+ SmallString<128> FilenameBuffer;
+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+ SourceLocation CharEnd = FilenameTok.getEndLoc();
+
+ CharSourceRange FilenameRange =
+ CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
+ bool isAngled =
+ GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ if (Filename.empty())
+ return;
+
+ // Search include directories.
+ bool IsMapped = false;
+ bool IsFrameworkFound = false;
+ ConstSearchDirIterator CurDir = nullptr;
+ SmallString<1024> SearchPath;
+ SmallString<1024> RelativePath;
+ // We get the raw path only if we have 'Callbacks' to which we later pass
+ // the path.
+ ModuleMap::KnownHeader SuggestedModule;
+ SourceLocation FilenameLoc = FilenameTok.getLocation();
+ StringRef LookupFilename = Filename;
+
+ // Normalize slashes when compiling with -fms-extensions on non-Windows. This
+ // is unnecessary on Windows since the filesystem there handles backslashes.
+ SmallString<128> NormalizedPath;
+ llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
+ if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
+ NormalizedPath = Filename.str();
+ llvm::sys::path::native(NormalizedPath);
+ LookupFilename = NormalizedPath;
+ BackslashStyle = llvm::sys::path::Style::windows;
+ }
+
+ OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
+ &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
+ IsFrameworkFound, false, IsMapped, LookupFrom, LookupFromFile,
+ LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
+ if (!File)
+ return;
+
+ // The #embed file will be considered to be a system header if either it is
+ // in a system include directory, or if the #embeder is a system include
+ // header.
+ SrcMgr::CharacteristicKind FileCharacter = HeaderInfo.getFileDirFlavor(*File);
+
+ // Issue a diagnostic if the name of the file on disk has a different case
+ // than the one we're about to open.
+ const bool CheckIncludePathPortability =
+ !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
+
+ if (CheckIncludePathPortability) {
+ StringRef Name = LookupFilename;
+ StringRef NameWithoriginalSlashes = Filename;
+#if defined(_WIN32)
+ // Skip UNC prefix if present. (tryGetRealPathName() always
+ // returns a path with the prefix skipped.)
+ bool NameWasUNC = Name.consume_front("\\\\?\\");
+ NameWithoriginalSlashes.consume_front("\\\\?\\");
+#endif
+ StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
+ SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
+ llvm::sys::path::end(Name));
+#if defined(_WIN32)
+ // -Wnonportable-include-path is designed to diagnose includes using
+ // case even on systems with a case-insensitive file system.
+ // On Windows, RealPathName always starts with an upper-case drive
+ // letter for absolute paths, but Name might start with either
+ // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
+ // ("foo" will always have on-disk case, no matter which case was
+ // used in the cd command). To not emit this warning solely for
+ // the drive letter, whose case is dependent on if `cd` is used
+ // with upper- or lower-case drive letters, always consider the
+ // given drive letter case as correct for the purpose of this warning.
+ SmallString<128> FixedDriveRealPath;
+ if (llvm::sys::path::is_absolute(Name) &&
+ llvm::sys::path::is_absolute(RealPathName) &&
+ toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
+ isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
+ assert(Components.size() >= 3 && "should have drive, backslash, name");
+ assert(Components[0].size() == 2 && "should start with drive");
+ assert(Components[0][1] == ':' && "should have colon");
+ FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
+ RealPathName = FixedDriveRealPath;
+ }
+#endif
+
+ if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
+ SmallString<128> Path;
+ Path.reserve(Name.size() + 2);
+ Path.push_back(isAngled ? '<' : '"');
+
+ const auto IsSep = [BackslashStyle](char c) {
+ return llvm::sys::path::is_separator(c, BackslashStyle);
+ };
+
+ for (auto Component : Components) {
+ // On POSIX, Components will contain a single '/' as first element
+ // exactly if Name is an absolute path.
+ // On Windows, it will contain "C:" followed by '\' for absolute paths.
+ // The drive letter is optional for absolute paths on Windows, but
+ // clang currently cannot process absolute paths in #embed lines that
+ // don't have a drive.
+ // If the first entry in Components is a directory separator,
+ // then the code at the bottom of this loop that keeps the original
+ // directory separator style copies it. If the second entry is
+ // a directory separator (the C:\ case), then that separator already
+ // got copied when the C: was processed and we want to skip that entry.
+ if (!(Component.size() == 1 && IsSep(Component[0])))
+ Path.append(Component);
+ else if (Path.size() != 1)
+ continue;
+
+ // Append the separator(s) the user used, or the close quote
+ if (Path.size() > NameWithoriginalSlashes.size()) {
+ Path.push_back(isAngled ? '>' : '"');
+ continue;
+ }
+ assert(IsSep(NameWithoriginalSlashes[Path.size() - 1]));
+ do
+ Path.push_back(NameWithoriginalSlashes[Path.size() - 1]);
+ while (Path.size() <= NameWithoriginalSlashes.size() &&
+ IsSep(NameWithoriginalSlashes[Path.size() - 1]));
+ }
+
+#if defined(_WIN32)
+ // Restore UNC prefix if it was there.
+ if (NameWasUNC)
+ Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
+#endif
+
+ // For user files and known standard headers, issue a diagnostic.
+ // For other system headers, don't. They can be controlled separately.
+ auto DiagId =
+ (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
+ ? diag::pp_nonportable_path
+ : diag::pp_nonportable_system_path;
+ Diag(FilenameTok, DiagId)
+ << Path << FixItHint::CreateReplacement(FilenameRange, Path);
+ }
+ }
+
+ // Look up the file, create a File ID for it.
+ SourceLocation EmbedPos = FilenameTok.getLocation();
+ // If the filename string was the result of macro expansions, set the embed
+ // position on the file where it will be embedded and after the expansions.
+ if (EmbedPos.isMacroID())
+ EmbedPos = SourceMgr.getExpansionRange(EmbedPos).getEnd();
+ FileID FID = SourceMgr.createFileID(*File, EmbedPos, FileCharacter);
+ if (!FID.isValid()) {
+ TheModuleLoader.HadFatalFailure = true;
+ assert(TheModuleLoader.HadFatalFailure &&
+ "This should be an early exit only to a fatal error");
+ TheModuleLoader.HadFatalFailure = true;
+ EmbedTok.setKind(tok::eof);
+ CurLexer->cutOffLexing();
+ return;
+ }
+
+ const auto FileBuffer = getFileManager().getBufferForFile(*File);
+ if (!FileBuffer)
+ return;
+ const auto Buffer = FileBuffer.get()->getBuffer();
+ if (Buffer.empty())
+ return;
+
+ std::string ProcessedBuffer = R"(
+ #pragma clang diagnostic push
+ #pragma clang diagnostic ignored "-Wc++11-narrowing"
+ )";
+ for (size_t i = 0; i < Buffer.size(); ++i) {
+ auto c = Buffer[i];
+ ProcessedBuffer += "0x" + llvm::toHex(c) + ",";
+ if ((i + 1) % 16 == 0)
+ ProcessedBuffer += "\n";
+ }
+ ProcessedBuffer += "\n#pragma clang diagnostic pop\n";
+ llvm::MemoryBufferRef Buf(
+ StringRef(ProcessedBuffer).copy(getPreprocessorAllocator()), Filename);
+ EnterSourceFile(SourceMgr.createFileID(Buf), CurDir,
+ FilenameTok.getLocation());
+}
+
OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
ConstSearchDirIterator *CurDir, StringRef &Filename,
SourceLocation FilenameLoc, CharSourceRange FilenameRange,
More information about the cfe-commits
mailing list