[clang] Add -fkeep-system-includes modifier for -E (PR #67684)

Paul T Robinson via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 29 13:35:59 PDT 2023


https://github.com/pogo59 updated https://github.com/llvm/llvm-project/pull/67684

>From 2a0dd575dbbc6666f80d1cd264d6dbc8de8fa897 Mon Sep 17 00:00:00 2001
From: Paul Robinson <paul.robinson at sony.com>
Date: Wed, 27 Sep 2023 15:30:52 -0700
Subject: [PATCH 1/4] [NFC] Change a reference member to pointer

This will allow the raw_ostream to be redirected in a subsequent commit.
---
 .../lib/Frontend/PrintPreprocessedOutput.cpp  | 208 +++++++++---------
 1 file changed, 104 insertions(+), 104 deletions(-)

diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 1b262d9e6f7cb3b..f86ba08d36223be 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -32,42 +32,42 @@ using namespace clang;
 /// PrintMacroDefinition - Print a macro definition in a form that will be
 /// properly accepted back as a definition.
 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
-                                 Preprocessor &PP, raw_ostream &OS) {
-  OS << "#define " << II.getName();
+                                 Preprocessor &PP, raw_ostream *OS) {
+  *OS << "#define " << II.getName();
 
   if (MI.isFunctionLike()) {
-    OS << '(';
+    *OS << '(';
     if (!MI.param_empty()) {
       MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
       for (; AI+1 != E; ++AI) {
-        OS << (*AI)->getName();
-        OS << ',';
+        *OS << (*AI)->getName();
+        *OS << ',';
       }
 
       // Last argument.
       if ((*AI)->getName() == "__VA_ARGS__")
-        OS << "...";
+        *OS << "...";
       else
-        OS << (*AI)->getName();
+        *OS << (*AI)->getName();
     }
 
     if (MI.isGNUVarargs())
-      OS << "...";  // #define foo(x...)
+      *OS << "...";  // #define foo(x...)
 
-    OS << ')';
+    *OS << ')';
   }
 
   // GCC always emits a space, even if the macro body is empty.  However, do not
   // want to emit two spaces if the first token has a leading space.
   if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
-    OS << ' ';
+    *OS << ' ';
 
   SmallString<128> SpellingBuffer;
   for (const auto &T : MI.tokens()) {
     if (T.hasLeadingSpace())
-      OS << ' ';
+      *OS << ' ';
 
-    OS << PP.getSpelling(T, SpellingBuffer);
+    *OS << PP.getSpelling(T, SpellingBuffer);
   }
 }
 
@@ -81,7 +81,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   SourceManager &SM;
   TokenConcatenation ConcatInfo;
 public:
-  raw_ostream &OS;
+  raw_ostream *OS;
 private:
   unsigned CurLine;
 
@@ -102,7 +102,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   Token PrevPrevTok;
 
 public:
-  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
+  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
                            bool defines, bool DumpIncludeDirectives,
                            bool UseLineDirectives, bool MinimizeWhitespace,
                            bool DirectivesOnly)
@@ -235,23 +235,23 @@ void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
 
   // Emit #line directives or GNU line markers depending on what mode we're in.
   if (UseLineDirectives) {
-    OS << "#line" << ' ' << LineNo << ' ' << '"';
-    OS.write_escaped(CurFilename);
-    OS << '"';
+    *OS << "#line" << ' ' << LineNo << ' ' << '"';
+    OS->write_escaped(CurFilename);
+    *OS << '"';
   } else {
-    OS << '#' << ' ' << LineNo << ' ' << '"';
-    OS.write_escaped(CurFilename);
-    OS << '"';
+    *OS << '#' << ' ' << LineNo << ' ' << '"';
+    OS->write_escaped(CurFilename);
+    *OS << '"';
 
     if (ExtraLen)
-      OS.write(Extra, ExtraLen);
+      OS->write(Extra, ExtraLen);
 
     if (FileType == SrcMgr::C_System)
-      OS.write(" 3", 2);
+      OS->write(" 3", 2);
     else if (FileType == SrcMgr::C_ExternCSystem)
-      OS.write(" 3 4", 4);
+      OS->write(" 3 4", 4);
   }
-  OS << '\n';
+  *OS << '\n';
 }
 
 /// MoveToLine - Move the output to the source line specified by the location
@@ -266,7 +266,7 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
   bool StartedNewLine = false;
   if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
       EmittedDirectiveOnThisLine) {
-    OS << '\n';
+    *OS << '\n';
     StartedNewLine = true;
     CurLine += 1;
     EmittedTokensOnThisLine = false;
@@ -283,12 +283,12 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
     // Printing a single line has priority over printing a #line directive, even
     // when minimizing whitespace which otherwise would print #line directives
     // for every single line.
-    OS << '\n';
+    *OS << '\n';
     StartedNewLine = true;
   } else if (!DisableLineMarkers) {
     if (LineNo - CurLine <= 8) {
       const char *NewLines = "\n\n\n\n\n\n\n\n";
-      OS.write(NewLines, LineNo - CurLine);
+      OS->write(NewLines, LineNo - CurLine);
     } else {
       // Emit a #line or line marker.
       WriteLineInfo(LineNo, nullptr, 0);
@@ -297,7 +297,7 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
   } else if (EmittedTokensOnThisLine) {
     // If we are not on the correct line and don't need to be line-correct,
     // at least ensure we start on a new line.
-    OS << '\n';
+    *OS << '\n';
     StartedNewLine = true;
   }
 
@@ -312,7 +312,7 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
 
 void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
   if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
-    OS << '\n';
+    *OS << '\n';
     EmittedTokensOnThisLine = false;
     EmittedDirectiveOnThisLine = false;
   }
@@ -399,9 +399,9 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
     MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
     const std::string TokenText = PP.getSpelling(IncludeTok);
     assert(!TokenText.empty());
-    OS << "#" << TokenText << " "
-       << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
-       << " /* clang -E -dI */";
+    *OS << "#" << TokenText << " "
+        << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
+        << " /* clang -E -dI */";
     setEmittedDirectiveOnThisLine();
   }
 
@@ -412,11 +412,11 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
     case tok::pp_import:
     case tok::pp_include_next:
       MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
-      OS << "#pragma clang module import " << Imported->getFullModuleName(true)
-         << " /* clang -E: implicit import for "
-         << "#" << PP.getSpelling(IncludeTok) << " "
-         << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
-         << " */";
+      *OS << "#pragma clang module import " << Imported->getFullModuleName(true)
+          << " /* clang -E: implicit import for "
+          << "#" << PP.getSpelling(IncludeTok) << " "
+          << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
+          << " */";
       setEmittedDirectiveOnThisLine();
       break;
 
@@ -438,14 +438,14 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
 /// Handle entering the scope of a module during a module compilation.
 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
   startNewLineIfNeeded();
-  OS << "#pragma clang module begin " << M->getFullModuleName(true);
+  *OS << "#pragma clang module begin " << M->getFullModuleName(true);
   setEmittedDirectiveOnThisLine();
 }
 
 /// Handle leaving the scope of a module during a module compilation.
 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
   startNewLineIfNeeded();
-  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
+  *OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
   setEmittedDirectiveOnThisLine();
 }
 
@@ -454,8 +454,8 @@ void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
 
-  OS.write("#ident ", strlen("#ident "));
-  OS.write(S.begin(), S.size());
+  OS->write("#ident ", strlen("#ident "));
+  OS->write(S.begin(), S.size());
   setEmittedTokensOnThisLine();
 }
 
@@ -491,19 +491,19 @@ void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
     return;
 
   MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
-  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
+  *OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
   setEmittedDirectiveOnThisLine();
 }
 
-static void outputPrintable(raw_ostream &OS, StringRef Str) {
+static void outputPrintable(raw_ostream *OS, StringRef Str) {
   for (unsigned char Char : Str) {
     if (isPrintable(Char) && Char != '\\' && Char != '"')
-      OS << (char)Char;
+      *OS << (char)Char;
     else // Output anything hard as an octal escape.
-      OS << '\\'
-         << (char)('0' + ((Char >> 6) & 7))
-         << (char)('0' + ((Char >> 3) & 7))
-         << (char)('0' + ((Char >> 0) & 7));
+      *OS << '\\'
+          << (char)('0' + ((Char >> 6) & 7))
+          << (char)('0' + ((Char >> 3) & 7))
+          << (char)('0' + ((Char >> 0) & 7));
   }
 }
 
@@ -512,25 +512,25 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
                                              PragmaMessageKind Kind,
                                              StringRef Str) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma ";
+  *OS << "#pragma ";
   if (!Namespace.empty())
-    OS << Namespace << ' ';
+    *OS << Namespace << ' ';
   switch (Kind) {
     case PMK_Message:
-      OS << "message(\"";
+      *OS << "message(\"";
       break;
     case PMK_Warning:
-      OS << "warning \"";
+      *OS << "warning \"";
       break;
     case PMK_Error:
-      OS << "error \"";
+      *OS << "error \"";
       break;
   }
 
   outputPrintable(OS, Str);
-  OS << '"';
+  *OS << '"';
   if (Kind == PMK_Message)
-    OS << ')';
+    *OS << ')';
   setEmittedDirectiveOnThisLine();
 }
 
@@ -538,8 +538,8 @@ void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
                                            StringRef DebugType) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
 
-  OS << "#pragma clang __debug ";
-  OS << DebugType;
+  *OS << "#pragma clang __debug ";
+  *OS << DebugType;
 
   setEmittedDirectiveOnThisLine();
 }
@@ -547,14 +547,14 @@ void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
 void PrintPPOutputPPCallbacks::
 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma " << Namespace << " diagnostic push";
+  *OS << "#pragma " << Namespace << " diagnostic push";
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::
 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma " << Namespace << " diagnostic pop";
+  *OS << "#pragma " << Namespace << " diagnostic pop";
   setEmittedDirectiveOnThisLine();
 }
 
@@ -563,25 +563,25 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
                                                 diag::Severity Map,
                                                 StringRef Str) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma " << Namespace << " diagnostic ";
+  *OS << "#pragma " << Namespace << " diagnostic ";
   switch (Map) {
   case diag::Severity::Remark:
-    OS << "remark";
+    *OS << "remark";
     break;
   case diag::Severity::Warning:
-    OS << "warning";
+    *OS << "warning";
     break;
   case diag::Severity::Error:
-    OS << "error";
+    *OS << "error";
     break;
   case diag::Severity::Ignored:
-    OS << "ignored";
+    *OS << "ignored";
     break;
   case diag::Severity::Fatal:
-    OS << "fatal";
+    *OS << "fatal";
     break;
   }
-  OS << " \"" << Str << '"';
+  *OS << " \"" << Str << '"';
   setEmittedDirectiveOnThisLine();
 }
 
@@ -590,69 +590,69 @@ void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
                                              ArrayRef<int> Ids) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
 
-  OS << "#pragma warning(";
+  *OS << "#pragma warning(";
   switch(WarningSpec) {
-    case PWS_Default:  OS << "default"; break;
-    case PWS_Disable:  OS << "disable"; break;
-    case PWS_Error:    OS << "error"; break;
-    case PWS_Once:     OS << "once"; break;
-    case PWS_Suppress: OS << "suppress"; break;
-    case PWS_Level1:   OS << '1'; break;
-    case PWS_Level2:   OS << '2'; break;
-    case PWS_Level3:   OS << '3'; break;
-    case PWS_Level4:   OS << '4'; break;
+    case PWS_Default:  *OS << "default"; break;
+    case PWS_Disable:  *OS << "disable"; break;
+    case PWS_Error:    *OS << "error"; break;
+    case PWS_Once:     *OS << "once"; break;
+    case PWS_Suppress: *OS << "suppress"; break;
+    case PWS_Level1:   *OS << '1'; break;
+    case PWS_Level2:   *OS << '2'; break;
+    case PWS_Level3:   *OS << '3'; break;
+    case PWS_Level4:   *OS << '4'; break;
   }
-  OS << ':';
+  *OS << ':';
 
   for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
-    OS << ' ' << *I;
-  OS << ')';
+    *OS << ' ' << *I;
+  *OS << ')';
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
                                                  int Level) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma warning(push";
+  *OS << "#pragma warning(push";
   if (Level >= 0)
-    OS << ", " << Level;
-  OS << ')';
+    *OS << ", " << Level;
+  *OS << ')';
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma warning(pop)";
+  *OS << "#pragma warning(pop)";
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
                                                      StringRef Str) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma character_execution_set(push";
+  *OS << "#pragma character_execution_set(push";
   if (!Str.empty())
-    OS << ", " << Str;
-  OS << ')';
+    *OS << ", " << Str;
+  *OS << ')';
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma character_execution_set(pop)";
+  *OS << "#pragma character_execution_set(pop)";
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::
 PragmaAssumeNonNullBegin(SourceLocation Loc) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma clang assume_nonnull begin";
+  *OS << "#pragma clang assume_nonnull begin";
   setEmittedDirectiveOnThisLine();
 }
 
 void PrintPPOutputPPCallbacks::
 PragmaAssumeNonNullEnd(SourceLocation Loc) {
   MoveToLine(Loc, /*RequireStartOfLine=*/true);
-  OS << "#pragma clang assume_nonnull end";
+  *OS << "#pragma clang assume_nonnull end";
   setEmittedDirectiveOnThisLine();
 }
 
@@ -673,7 +673,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
     if (MinimizeWhitespace) {
       // Avoid interpreting hash as a directive under -fpreprocessed.
       if (Tok.is(tok::hash))
-        OS << ' ';
+        *OS << ' ';
     } else {
       // Print out space characters so that the first token on a line is
       // indented for easy reading.
@@ -693,11 +693,11 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
       // is not handled as a #define next time through the preprocessor if in
       // -fpreprocessed mode.
       if (ColNo <= 1 && Tok.is(tok::hash))
-        OS << ' ';
+        *OS << ' ';
 
       // Otherwise, indent the appropriate number of spaces.
       for (; ColNo > 1; --ColNo)
-        OS << ' ';
+        *OS << ' ';
     }
   } else {
     // Insert whitespace between the previous and next token if either
@@ -709,7 +709,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
     if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
         ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
          AvoidConcat(PrevPrevTok, PrevTok, Tok)))
-      OS << ' ';
+      *OS << ' ';
   }
 
   PrevPrevTok = PrevTok;
@@ -758,7 +758,7 @@ struct UnknownPragmaHandler : public PragmaHandler {
     // Figure out what line we went to and insert the appropriate number of
     // newline characters.
     Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
-    Callbacks->OS.write(Prefix, strlen(Prefix));
+    Callbacks->OS->write(Prefix, strlen(Prefix));
     Callbacks->setEmittedTokensOnThisLine();
 
     if (ShouldExpandTokens) {
@@ -779,7 +779,7 @@ struct UnknownPragmaHandler : public PragmaHandler {
                                            /*RequireSameLine=*/true);
       IsFirst = false;
       std::string TokSpell = PP.getSpelling(PragmaTok);
-      Callbacks->OS.write(&TokSpell[0], TokSpell.size());
+      Callbacks->OS->write(&TokSpell[0], TokSpell.size());
       Callbacks->setEmittedTokensOnThisLine();
 
       if (ShouldExpandTokens)
@@ -795,7 +795,7 @@ struct UnknownPragmaHandler : public PragmaHandler {
 
 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
                                     PrintPPOutputPPCallbacks *Callbacks,
-                                    raw_ostream &OS) {
+                                    raw_ostream *OS) {
   bool DropComments = PP.getLangOpts().TraditionalCPP &&
                       !PP.getCommentRetentionState();
 
@@ -863,7 +863,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       // components. We don't have a good way to round-trip those.
       Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
       std::string Name = M->getFullModuleName();
-      OS.write(Name.data(), Name.size());
+      OS->write(Name.data(), Name.size());
       Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
     } else if (Tok.isAnnotation()) {
       // Ignore annotation tokens created by pragmas - the pragmas themselves
@@ -871,14 +871,14 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       PP.Lex(Tok);
       continue;
     } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
-      OS << II->getName();
+      *OS << II->getName();
     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
                Tok.getLiteralData()) {
-      OS.write(Tok.getLiteralData(), Tok.getLength());
+      OS->write(Tok.getLiteralData(), Tok.getLength());
     } else if (Tok.getLength() < std::size(Buffer)) {
       const char *TokPtr = Buffer;
       unsigned Len = PP.getSpelling(Tok, TokPtr);
-      OS.write(TokPtr, Len);
+      OS->write(TokPtr, Len);
 
       // Tokens that can contain embedded newlines need to adjust our current
       // line number.
@@ -895,7 +895,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       }
     } else {
       std::string S = PP.getSpelling(Tok);
-      OS.write(S.data(), S.size());
+      OS->write(S.data(), S.size());
 
       // Tokens that can contain embedded newlines need to adjust our current
       // line number.
@@ -947,7 +947,7 @@ static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
     // Ignore computed macros like __LINE__ and friends.
     if (MI.isBuiltinMacro()) continue;
 
-    PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
+    PrintMacroDefinition(*MacrosByID[i].first, MI, PP, OS);
     *OS << '\n';
   }
 }
@@ -968,7 +968,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
   PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
-      PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
+      PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
       Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
       Opts.MinimizeWhitespace, Opts.DirectivesOnly);
 
@@ -1028,7 +1028,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
   } while (true);
 
   // Read all the preprocessed tokens, printing them out to the stream.
-  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
+  PrintPreprocessedTokens(PP, Tok, Callbacks, OS);
   *OS << '\n';
 
   // Remove the handlers we just added to leave the preprocessor in a sane state

>From a71131f734c90aeaff9c3fb0cdc57c0f81dd8971 Mon Sep 17 00:00:00 2001
From: Paul Robinson <paul.robinson at sony.com>
Date: Thu, 28 Sep 2023 07:18:11 -0700
Subject: [PATCH 2/4] Add -fkeep-system-includes modifier for -E

This option will cause -E to preserve the #include directives
for system headers, rather than expanding them into the output.
This can greatly reduce the volume of preprocessed source text
in a test case, making test case reduction simpler.

Note that -fkeep-system-includes is not always appropriate. For
example, if the problem you want to reproduce is induced by a
system header file, it's better to expand those headers fully.
If your source defines symbols that influence the content of a
system header (e.g., _POSIX_SOURCE) then -E will eliminate the
definition, potentially changing the meaning of the preprocessed
source. If you use -isystem to point to non-system headers, for
example to suppress warnings in third-party software, those will
not be expanded and might make the preprocessed source less useful
as a test case.
---
 .../clang/Basic/DiagnosticDriverKinds.td      |  4 +-
 clang/include/clang/Driver/Options.td         |  7 +++
 .../Frontend/PreprocessorOutputOptions.h      |  2 +
 clang/lib/Driver/ToolChains/Clang.cpp         | 18 +++++++-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  | 43 ++++++++++++-------
 clang/test/Frontend/Inputs/dashE/dashE.h      |  3 ++
 clang/test/Frontend/Inputs/dashE/sys/a.h      |  3 ++
 clang/test/Frontend/Inputs/dashE/sys/b.h      |  1 +
 clang/test/Frontend/dashE-sysincludes.cpp     | 23 ++++++++++
 .../minimize-whitespace-messages.c            | 15 ++++---
 10 files changed, 94 insertions(+), 25 deletions(-)
 create mode 100644 clang/test/Frontend/Inputs/dashE/dashE.h
 create mode 100644 clang/test/Frontend/Inputs/dashE/sys/a.h
 create mode 100644 clang/test/Frontend/Inputs/dashE/sys/b.h
 create mode 100644 clang/test/Frontend/dashE-sysincludes.cpp

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 2a48c063e243ee0..446a610f56f0379 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -172,8 +172,8 @@ def err_drv_invalid_Xopenmp_target_with_args : Error<
   "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">;
 def err_drv_argument_only_allowed_with : Error<
   "invalid argument '%0' only allowed with '%1'">;
-def err_drv_minws_unsupported_input_type : Error<
-  "'-fminimize-whitespace' invalid for input of type %0">;
+def err_drv_opt_unsupported_input_type : Error<
+  "'%0' invalid for input of type %1">;
 def err_drv_amdgpu_ieee_without_no_honor_nans : Error<
   "invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">;
 def err_drv_argument_not_allowed_with : Error<
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f8143651cd3c151..4a64aee64d3f06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2483,6 +2483,13 @@ defm minimize_whitespace : BoolFOption<"minimize-whitespace",
           "whitespace such that two files with only formatting changes are "
           "equal.\n\nOnly valid with -E on C-like inputs and incompatible "
           "with -traditional-cpp.">, NegFlag<SetFalse>>;
+defm keep_system_includes : BoolFOption<"keep-system-includes",
+  PreprocessorOutputOpts<"KeepSystemIncludes">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+          "Instead of expanding system headers when emitting preprocessor "
+          "output, preserve the #include directive. Useful when producing "
+          "preprocessed output for test case reduction.\n\nOnly valid with -E.">,
+  NegFlag<SetFalse>>;
 
 def ffreestanding : Flag<["-"], "ffreestanding">, Group<f_Group>,
   Visibility<[ClangOption, CC1Option]>,
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index d542032431b14c8..db2ec9f2ae20698 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -26,6 +26,7 @@ class PreprocessorOutputOptions {
   unsigned RewriteImports  : 1;    ///< Include contents of transitively-imported modules.
   unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
   unsigned DirectivesOnly : 1; ///< Process directives but do not expand macros.
+  unsigned KeepSystemIncludes : 1; ///< Do not expand system headers.
 
 public:
   PreprocessorOutputOptions() {
@@ -40,6 +41,7 @@ class PreprocessorOutputOptions {
     RewriteImports = 0;
     MinimizeWhitespace = 0;
     DirectivesOnly = 0;
+    KeepSystemIncludes = 0;
   }
 };
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index dda6aef641904aa..1101add97b65e85 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -68,7 +68,9 @@ using namespace llvm::opt;
 static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
   if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC,
                                options::OPT_fminimize_whitespace,
-                               options::OPT_fno_minimize_whitespace)) {
+                               options::OPT_fno_minimize_whitespace,
+                               options::OPT_fkeep_system_includes,
+                               options::OPT_fno_keep_system_includes)) {
     if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) &&
         !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) {
       D.Diag(clang::diag::err_drv_argument_only_allowed_with)
@@ -6713,11 +6715,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_fno_minimize_whitespace, false)) {
     types::ID InputType = Inputs[0].getType();
     if (!isDerivedFromC(InputType))
-      D.Diag(diag::err_drv_minws_unsupported_input_type)
+      D.Diag(diag::err_drv_opt_unsupported_input_type)
+          << "-fminimize-whitespace"
           << types::getTypeName(InputType);
     CmdArgs.push_back("-fminimize-whitespace");
   }
 
+  // -fno-keep-system-includes is default.
+  if (Args.hasFlag(options::OPT_fkeep_system_includes,
+                   options::OPT_fno_keep_system_includes, false)) {
+    types::ID InputType = Inputs[0].getType();
+    if (!isDerivedFromC(InputType))
+      D.Diag(diag::err_drv_opt_unsupported_input_type)
+      << "-fkeep-system-includes"
+      << types::getTypeName(InputType);
+    CmdArgs.push_back("-fkeep-system-includes");
+  }
+
   // -fms-extensions=0 is default.
   if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
                    IsWindowsMSVC))
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index f86ba08d36223be..4056649e85c28a9 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -97,6 +97,9 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   bool IsFirstFileEntered;
   bool MinimizeWhitespace;
   bool DirectivesOnly;
+  bool KeepSystemIncludes;
+  raw_ostream *OrigOS;
+  std::unique_ptr<llvm::raw_null_ostream> NullOS;
 
   Token PrevTok;
   Token PrevPrevTok;
@@ -105,12 +108,13 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
                            bool defines, bool DumpIncludeDirectives,
                            bool UseLineDirectives, bool MinimizeWhitespace,
-                           bool DirectivesOnly)
+                           bool DirectivesOnly, bool KeepSystemIncludes)
       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
         DisableLineMarkers(lineMarkers), DumpDefines(defines),
         DumpIncludeDirectives(DumpIncludeDirectives),
         UseLineDirectives(UseLineDirectives),
-        MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly) {
+        MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
+        KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
     CurLine = 0;
     CurFilename += "<uninit>";
     EmittedTokensOnThisLine = false;
@@ -118,6 +122,8 @@ class PrintPPOutputPPCallbacks : public PPCallbacks {
     FileType = SrcMgr::C_User;
     Initialized = false;
     IsFirstFileEntered = false;
+    if (KeepSystemIncludes)
+      NullOS = std::make_unique<llvm::raw_null_ostream>();
 
     PrevTok.startToken();
     PrevPrevTok.startToken();
@@ -350,6 +356,10 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
 
   CurLine = NewLine;
 
+  // In KeepSystemIncludes mode, redirect OS as needed.
+  if (KeepSystemIncludes && (isSystem(FileType) != isSystem(NewFileType)))
+    OS = isSystem(FileType) ? OrigOS : NullOS.get();
+
   CurFilename.clear();
   CurFilename += UserLoc.getFilename();
   FileType = NewFileType;
@@ -394,14 +404,16 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
     SrcMgr::CharacteristicKind FileType) {
   // In -dI mode, dump #include directives prior to dumping their content or
-  // interpretation.
-  if (DumpIncludeDirectives) {
+  // interpretation. Similar for -fkeep-system-includes.
+  if (DumpIncludeDirectives || (KeepSystemIncludes && isSystem(FileType))) {
     MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
     const std::string TokenText = PP.getSpelling(IncludeTok);
     assert(!TokenText.empty());
     *OS << "#" << TokenText << " "
         << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
-        << " /* clang -E -dI */";
+        << " /* clang -E "
+        << (DumpIncludeDirectives ? "-dI" : "-fkeep-system-includes")
+        << " */";
     setEmittedDirectiveOnThisLine();
   }
 
@@ -412,7 +424,8 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
     case tok::pp_import:
     case tok::pp_include_next:
       MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
-      *OS << "#pragma clang module import " << Imported->getFullModuleName(true)
+      *OS << "#pragma clang module import "
+          << Imported->getFullModuleName(true)
           << " /* clang -E: implicit import for "
           << "#" << PP.getSpelling(IncludeTok) << " "
           << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
@@ -794,8 +807,7 @@ struct UnknownPragmaHandler : public PragmaHandler {
 
 
 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
-                                    PrintPPOutputPPCallbacks *Callbacks,
-                                    raw_ostream *OS) {
+                                    PrintPPOutputPPCallbacks *Callbacks) {
   bool DropComments = PP.getLangOpts().TraditionalCPP &&
                       !PP.getCommentRetentionState();
 
@@ -863,7 +875,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       // components. We don't have a good way to round-trip those.
       Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
       std::string Name = M->getFullModuleName();
-      OS->write(Name.data(), Name.size());
+      Callbacks->OS->write(Name.data(), Name.size());
       Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
     } else if (Tok.isAnnotation()) {
       // Ignore annotation tokens created by pragmas - the pragmas themselves
@@ -871,14 +883,14 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       PP.Lex(Tok);
       continue;
     } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
-      *OS << II->getName();
+      *Callbacks->OS << II->getName();
     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
                Tok.getLiteralData()) {
-      OS->write(Tok.getLiteralData(), Tok.getLength());
+      Callbacks->OS->write(Tok.getLiteralData(), Tok.getLength());
     } else if (Tok.getLength() < std::size(Buffer)) {
       const char *TokPtr = Buffer;
       unsigned Len = PP.getSpelling(Tok, TokPtr);
-      OS->write(TokPtr, Len);
+      Callbacks->OS->write(TokPtr, Len);
 
       // Tokens that can contain embedded newlines need to adjust our current
       // line number.
@@ -895,7 +907,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       }
     } else {
       std::string S = PP.getSpelling(Tok);
-      OS->write(S.data(), S.size());
+      Callbacks->OS->write(S.data(), S.size());
 
       // Tokens that can contain embedded newlines need to adjust our current
       // line number.
@@ -970,7 +982,8 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
       Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
-      Opts.MinimizeWhitespace, Opts.DirectivesOnly);
+      Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+      Opts.KeepSystemIncludes);
 
   // Expand macros in pragmas with -fms-extensions.  The assumption is that
   // the majority of pragmas in such a file will be Microsoft pragmas.
@@ -1028,7 +1041,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
   } while (true);
 
   // Read all the preprocessed tokens, printing them out to the stream.
-  PrintPreprocessedTokens(PP, Tok, Callbacks, OS);
+  PrintPreprocessedTokens(PP, Tok, Callbacks);
   *OS << '\n';
 
   // Remove the handlers we just added to leave the preprocessor in a sane state
diff --git a/clang/test/Frontend/Inputs/dashE/dashE.h b/clang/test/Frontend/Inputs/dashE/dashE.h
new file mode 100644
index 000000000000000..c1a9df40e34ddde
--- /dev/null
+++ b/clang/test/Frontend/Inputs/dashE/dashE.h
@@ -0,0 +1,3 @@
+int dashE_1;
+#include <a.h>
+int dashE_2;
diff --git a/clang/test/Frontend/Inputs/dashE/sys/a.h b/clang/test/Frontend/Inputs/dashE/sys/a.h
new file mode 100644
index 000000000000000..55b2bcb4ccb4083
--- /dev/null
+++ b/clang/test/Frontend/Inputs/dashE/sys/a.h
@@ -0,0 +1,3 @@
+int a_1;
+#include <b.h>
+int a_2;
diff --git a/clang/test/Frontend/Inputs/dashE/sys/b.h b/clang/test/Frontend/Inputs/dashE/sys/b.h
new file mode 100644
index 000000000000000..67bdce58dfb976e
--- /dev/null
+++ b/clang/test/Frontend/Inputs/dashE/sys/b.h
@@ -0,0 +1 @@
+int b_1;
diff --git a/clang/test/Frontend/dashE-sysincludes.cpp b/clang/test/Frontend/dashE-sysincludes.cpp
new file mode 100644
index 000000000000000..db0c488d715775d
--- /dev/null
+++ b/clang/test/Frontend/dashE-sysincludes.cpp
@@ -0,0 +1,23 @@
+// RUN: mkdir %t.dir
+// RUN: %clang_cc1 -E -fkeep-system-includes -I %S/Inputs/dashE -isystem %S/Inputs/dashE/sys %s | FileCheck %s
+
+int main_1 = 1;
+#include <a.h>
+int main_2 = 1;
+#include "dashE.h"
+int main_3 = 1;
+
+// CHECK: main_1
+// CHECK: #include <a.h>
+// CHECK-NOT: a_1
+// CHECK-NOT: a_2
+// CHECK-NOT: b.h
+// CHECK: main_2
+// CHECK-NOT: #include "dashE.h"
+// CHECK: dashE_1
+// CHECK: #include <a.h>
+// CHECK-NOT: a_1
+// CHECK-NOT: a_2
+// CHECK-NOT: b.h
+// CHECK: dashE_2
+// CHECK: main_3
diff --git a/clang/test/Preprocessor/minimize-whitespace-messages.c b/clang/test/Preprocessor/minimize-whitespace-messages.c
index a78ddb471fb7c08..f930bbe9c257f2c 100644
--- a/clang/test/Preprocessor/minimize-whitespace-messages.c
+++ b/clang/test/Preprocessor/minimize-whitespace-messages.c
@@ -1,8 +1,11 @@
-// RUN: not %clang -c -fminimize-whitespace %s 2>&1 | FileCheck %s --check-prefix=ON
-// ON: error: invalid argument '-fminimize-whitespace' only allowed with '-E'
+// RUN: not %clang -c -fminimize-whitespace %s 2>&1 | FileCheck %s --check-prefix=ON -DOPT=-fminimize-whitespace
+// RUN: not %clang -c -fkeep-system-includes %s 2>&1 | FileCheck %s --check-prefix=ON -DOPT=-fkeep-system-includes
+// ON: error: invalid argument '[[OPT]]' only allowed with '-E'
 
-// RUN: not %clang -c -fno-minimize-whitespace %s 2>&1 | FileCheck %s  --check-prefix=OFF
-// OFF: error: invalid argument '-fno-minimize-whitespace' only allowed with '-E'
+// RUN: not %clang -c -fno-minimize-whitespace %s 2>&1 | FileCheck %s  --check-prefix=OFF -DOPT=-fno-minimize-whitespace
+// RUN: not %clang -c -fno-keep-system-includes %s 2>&1 | FileCheck %s  --check-prefix=OFF -DOPT=-fno-keep-system-includes
+// OFF: error: invalid argument '[[OPT]]' only allowed with '-E'
 
-// RUN: not %clang -E -fminimize-whitespace -x assembler-with-cpp %s 2>&1 | FileCheck %s --check-prefix=ASM
-// ASM: error: '-fminimize-whitespace' invalid for input of type assembler-with-cpp
+// RUN: not %clang -E -fminimize-whitespace -x assembler-with-cpp %s 2>&1 | FileCheck %s --check-prefix=ASM -DOPT=-fminimize-whitespace
+// RUN: not %clang -E -fkeep-system-includes -x assembler-with-cpp %s 2>&1 | FileCheck %s --check-prefix=ASM -DOPT=-fkeep-system-includes
+// ASM: error: '[[OPT]]' invalid for input of type assembler-with-cpp

>From a10c5e32d135f7bc456d6eb79f360f87378c270b Mon Sep 17 00:00:00 2001
From: Paul Robinson <paul.robinson at sony.com>
Date: Fri, 29 Sep 2023 12:18:30 -0700
Subject: [PATCH 3/4] Add caveats to the help text.

---
 clang/include/clang/Driver/Options.td | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 4a64aee64d3f06e..ea95f415c8aad2d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2488,7 +2488,11 @@ defm keep_system_includes : BoolFOption<"keep-system-includes",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Instead of expanding system headers when emitting preprocessor "
           "output, preserve the #include directive. Useful when producing "
-          "preprocessed output for test case reduction.\n\nOnly valid with -E.">,
+          "preprocessed output for test case reduction. May produce incorrect "
+          "output if preprocessor symbols that control the included content "
+          "(e.g. _XOPEN_SOURCE) are defined in the including source file. The "
+          "portability of the resulting source to other compilation environments "
+          "is not guaranteed.\n\nOnly valid with -E.">,
   NegFlag<SetFalse>>;
 
 def ffreestanding : Flag<["-"], "ffreestanding">, Group<f_Group>,

>From 6c8908db85fc53ef8208b9a1682df216e4fcbe88 Mon Sep 17 00:00:00 2001
From: Paul Robinson <paul.robinson at sony.com>
Date: Fri, 29 Sep 2023 13:35:46 -0700
Subject: [PATCH 4/4] Add doc and release note

---
 clang/docs/CommandGuide/clang.rst | 15 +++++++++++++++
 clang/docs/ReleaseNotes.rst       |  4 ++++
 2 files changed, 19 insertions(+)

diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst
index 139c8f25137d3fb..e1c872cdc55396a 100644
--- a/clang/docs/CommandGuide/clang.rst
+++ b/clang/docs/CommandGuide/clang.rst
@@ -684,6 +684,21 @@ Preprocessor Options
 
   Do not search clang's builtin directory for include files.
 
+.. option:: -fkeep-system-includes
+
+  Usable only with :option:`-E`. Do not copy the preprocessed content of
+  "system" headers to the output; instead, preserve the #include directive.
+  This can greatly reduce the volume of text produced by :option:`-E` which
+  can be helpful when trying to produce a "small" reproduceable test case.
+
+  This option does not guarantee reproduceability, however. If the including
+  source defines preprocessor symbols that influence the behavior of system
+  headers (for example, ``_XOPEN_SOURCE``) the operation of :option:`-E` will
+  remove that definition and thus can change the semantics of the included
+  header. Also, using a different version of the system headers (especially a
+  different version of the STL) may result in different behavior. Always verify
+  the preprocessed file by compiling it separately.
+
 
 ENVIRONMENT
 -----------
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 68172d5317a13ba..d13e4ac7e0fe5e8 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -141,6 +141,10 @@ Non-comprehensive list of changes in this release
 
 New Compiler Flags
 ------------------
+- ``-fkeep-system-includes`` modifies the behavior of the ``-E`` option,
+  preserving ``#include`` directives for "system" headers instead of copying
+  the preprocessed text to the output. This can greatly reduce the size of the
+  preprocessed output, which can be helpful when trying to reduce a test case.
 
 Deprecated Compiler Flags
 -------------------------



More information about the cfe-commits mailing list