[PATCH] Don't warn about Unicode characters in -E mode

Tue Jan 29 14:29:01 PST 2013

Hi rsmith,

People use the C preprocessor for things other than C files. Some of them have Unicode characters. We shouldn't warn about Unicode characters appearing outside of identifiers in this case.c

There's not currently a way for the preprocessor to //tell// if it's in -E mode, so I added a new flag, derived from the PreprocessorOutputOptions.

http://llvm-reviews.chandlerc.com/D346

Files:
  include/clang/Frontend/PreprocessorOutputOptions.h
  include/clang/Lex/Preprocessor.h
  lib/Frontend/CompilerInstance.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Lex/Lexer.cpp
  test/Lexer/unicode.c

Index: include/clang/Frontend/PreprocessorOutputOptions.h
===================================================================

--- include/clang/Frontend/PreprocessorOutputOptions.h
+++ include/clang/Frontend/PreprocessorOutputOptions.h
@@ -25,7 +25,7 @@
 
 public:
   PreprocessorOutputOptions() {
-    ShowCPP = 1;
+    ShowCPP = 0;
     ShowComments = 0;
     ShowLineMarkers = 1;
     ShowMacroComments = 0;
Index: include/clang/Lex/Preprocessor.h
===================================================================
--- include/clang/Lex/Preprocessor.h
+++ include/clang/Lex/Preprocessor.h
@@ -160,6 +160,9 @@
   /// \brief True if pragmas are enabled.
   bool PragmasEnabled : 1;
 
+  /// \brief True if the current build action is a preprocessing action.
+  bool PreprocessedOutput : 1;
+
   /// \brief True if we are currently preprocessing a #if or #elif directive
   bool ParsingIfOrElifDirective;
 
@@ -474,6 +477,16 @@
     return SuppressIncludeNotFoundError;
   }
 
+  /// Sets whether the preprocessor is responsible for producing output or if
+  /// it is producing tokens to be consumed by Parse and Sema.
+  void setPreprocessedOutput(bool IsPreprocessedOutput) {
+    PreprocessedOutput = IsPreprocessedOutput;
+  }
+
+  /// Returns true if the preprocessor is responsible for generating output,
+  /// false if it is producing tokens to be consumed by Parse and Sema.
+  bool isPreprocessedOutput() const { return PreprocessedOutput; }
+
   /// isCurrentLexer - Return true if we are lexing directly from the specified
   /// lexer.
   bool isCurrentLexer(const PreprocessorLexer *L) const {
Index: lib/Frontend/CompilerInstance.cpp
===================================================================
--- lib/Frontend/CompilerInstance.cpp
+++ lib/Frontend/CompilerInstance.cpp
@@ -243,6 +243,8 @@
 
   InitializePreprocessor(*PP, PPOpts, getHeaderSearchOpts(), getFrontendOpts());
 
+  PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP);
+
   // Set up the module path, including the hash for the
   // module-creation options.
   SmallString<256> SpecificModuleCache(
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1391,9 +1391,48 @@
 }
 
 static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
-                                        ArgList &Args) {
+                                        ArgList &Args,
+                                        frontend::ActionKind Action) {
   using namespace options;
-  Opts.ShowCPP = !Args.hasArg(OPT_dM);
+
+  switch (Action) {
+  case frontend::ASTDeclList:
+  case frontend::ASTDump:
+  case frontend::ASTDumpXML:
+  case frontend::ASTPrint:
+  case frontend::ASTView:
+  case frontend::EmitAssembly:
+  case frontend::EmitBC:
+  case frontend::EmitHTML:
+  case frontend::EmitLLVM:
+  case frontend::EmitLLVMOnly:
+  case frontend::EmitCodeGenOnly:
+  case frontend::EmitObj:
+  case frontend::FixIt:
+  case frontend::GenerateModule:
+  case frontend::GeneratePCH:
+  case frontend::GeneratePTH:
+  case frontend::ParseSyntaxOnly:
+  case frontend::PluginAction:
+  case frontend::PrintDeclContext:
+  case frontend::RewriteObjC:
+  case frontend::RewriteTest:
+  case frontend::RunAnalysis:
+  case frontend::MigrateSource:
+    Opts.ShowCPP = 0;
+    break;
+
+  case frontend::DumpRawTokens:
+  case frontend::DumpTokens:
+  case frontend::InitOnly:
+  case frontend::PrintPreamble:
+  case frontend::PrintPreprocessedInput:
+  case frontend::RewriteMacros:
+  case frontend::RunPreprocessorOnly:
+    Opts.ShowCPP = !Args.hasArg(OPT_dM);
+    break;
+  }
+
   Opts.ShowComments = Args.hasArg(OPT_C);
   Opts.ShowLineMarkers = !Args.hasArg(OPT_P);
   Opts.ShowMacroComments = Args.hasArg(OPT_CC);
@@ -1474,7 +1513,8 @@
   // parameters from the function and the "FileManager.h" #include.
   FileManager FileMgr(Res.getFileSystemOpts());
   ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags);
-  ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args);
+  ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args,
+                              Res.getFrontendOpts().ProgramAction);
   ParseTargetArgs(Res.getTargetOpts(), *Args);
 
   return Success;
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -2812,7 +2812,7 @@
 
 void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
   if (isUnicodeWhitespace(C)) {
-    if (!isLexingRawMode()) {
+    if (!isLexingRawMode() && !PP->isPreprocessedOutput()) {
       CharSourceRange CharRange =
         CharSourceRange::getCharRange(getSourceLocation(),
                                       getSourceLocation(CurPtr));
@@ -2832,7 +2832,8 @@
     return LexIdentifier(Result, CurPtr);
   }
 
-  if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
+  if (!isASCII(*BufferPtr) && !isAllowedIDChar(C) &&
+      (isLexingRawMode() || !PP->isPreprocessedOutput())) {
     // Non-ASCII characters tend to creep into source code unintentionally.
     // Instead of letting the parser complain about the unknown token,
     // just drop the character.
Index: test/Lexer/unicode.c
===================================================================
--- test/Lexer/unicode.c
+++ test/Lexer/unicode.c
@@ -1,6 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -E -DPP_ONLY=1 %s -o %t
+// RUN: FileCheck --input-file=%t %s
 
 // This file contains Unicode characters; please do not "fix" them!
 
 extern int x; // expected-warning {{treating Unicode character as whitespace}}
 extern int　x; // expected-warning {{treating Unicode character as whitespace}}
+
+#if PP_ONLY
+CHECK: The preprocessor should not complain about Unicode characters like ©.
+#endif
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D346.1.patch
Type: text/x-patch
Size: 5915 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130129/71d09e56/attachment.bin>