[PATCH] [2/6] Convert non-printing characters to their octal sequence before emitting #line directive or __FILE__ macro

Yunzhong Gao Yunzhong_Gao at playstation.sony.com
Wed Sep 11 10:51:15 PDT 2013


ygao added you to the CC list for the revision "[2/6] Convert non-printing characters to their octal sequence before emitting #line directive or __FILE__ macro".

Hi,
When clang generates a preprocessed file, it saves current file name into a
#line directive or __FILE__ macro. But if the file name contains escaped
octal sequences, clang tries to convert the characters, which may be non-utf8,
which then triggers diagnostics like this:
```
/* test.c */
#line 5 "\202\261\202\361\202\311\202\277\202\315.c"
/* end of test.c */
```
$ clang -S -save-temps test.c
test.c:1:6: warning: illegal character encoding in string literal [-Winvalid-source-encoding]

Clang does not really have to convert the characters; it could have just saved
the escaped sequence in the preprocessed output. The proposed patch attempts to
convert any non-printing characters to their corresponding escaped octal
sequence before printing out. This is part of a bigger effort to support
foreign characters in file names.

Could someone take a look whether the proposed patch is good to go in?

Many thanks,
- Gao

http://llvm-reviews.chandlerc.com/D1291

Files:
  include/clang/Lex/Lexer.h
  lib/Frontend/PrintPreprocessedOutput.cpp
  lib/Lex/Lexer.cpp
  lib/Lex/PPMacroExpansion.cpp
  test/Preprocessor/line-directive-output.c

Index: include/clang/Lex/Lexer.h
===================================================================
--- include/clang/Lex/Lexer.h
+++ include/clang/Lex/Lexer.h
@@ -236,6 +236,11 @@
   /// and " characters.  This does not add surrounding ""'s to the string.
   static void Stringify(SmallVectorImpl<char> &Str);
 
+  /// StringifyWithAddedEscape - Convert the specified string into a C string
+  /// and convert any non-printable characters to escaped octal sequence. This
+  /// does not add surrounding quotes to the string.
+  static void StringifyWithAddedEscape(SmallVectorImpl<char> &Str);
+
   
   /// getSpelling - This method is used to get the spelling of a token into a
   /// preallocated buffer, instead of as an std::string.  The caller is required
Index: lib/Frontend/PrintPreprocessedOutput.cpp
===================================================================
--- lib/Frontend/PrintPreprocessedOutput.cpp
+++ lib/Frontend/PrintPreprocessedOutput.cpp
@@ -285,7 +285,7 @@
 
   CurFilename.clear();
   CurFilename += UserLoc.getFilename();
-  Lexer::Stringify(CurFilename);
+  Lexer::StringifyWithAddedEscape(CurFilename);
   FileType = NewFileType;
 
   if (DisableLineMarkers) {
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -236,6 +236,41 @@
   }
 }
 
+// Convert non-printing characters to escaped octal sequence.
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+/// StringifyWithAddedEscape - Convert the specified string into a C string and
+/// convert any non-printing characters to escaped octal sequence. This does
+/// not add surrounding quotes to the string.
+void Lexer::StringifyWithAddedEscape(SmallVectorImpl<char> &Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    unsigned char C = Str[i];
+    if (C == '\\' || C == '"') {
+      Str.insert(Str.begin()+i, '\\');
+      ++i; ++e;
+      continue;
+    }
+
+    if (isprint((unsigned)C))
+      continue;
+
+    switch (C) {
+      case '\b': Str[i]='b'; Str.insert(Str.begin()+i, '\\'); ++i; ++e; break;
+      case '\f': Str[i]='f'; Str.insert(Str.begin()+i, '\\'); ++i; ++e; break;
+      case '\n': Str[i]='n'; Str.insert(Str.begin()+i, '\\'); ++i; ++e; break;
+      case '\r': Str[i]='r'; Str.insert(Str.begin()+i, '\\'); ++i; ++e; break;
+      case '\t': Str[i]='t'; Str.insert(Str.begin()+i, '\\'); ++i; ++e; break;
+      default:
+        Str[i] = '\\';
+        Str.insert(Str.begin() + i + 1, toOctal(C >> 6));
+        Str.insert(Str.begin() + i + 2, toOctal(C >> 3));
+        Str.insert(Str.begin() + i + 3, toOctal(C >> 0));
+        i += 3; e += 3;
+        break;
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Token Spelling
 //===----------------------------------------------------------------------===//
Index: lib/Lex/PPMacroExpansion.cpp
===================================================================
--- lib/Lex/PPMacroExpansion.cpp
+++ lib/Lex/PPMacroExpansion.cpp
@@ -1316,7 +1316,7 @@
     SmallString<128> FN;
     if (PLoc.isValid()) {
       FN += PLoc.getFilename();
-      Lexer::Stringify(FN);
+      Lexer::StringifyWithAddedEscape(FN);
       OS << '"' << FN.str() << '"';
     }
     Tok.setKind(tok::string_literal);
Index: test/Preprocessor/line-directive-output.c
===================================================================
--- test/Preprocessor/line-directive-output.c
+++ test/Preprocessor/line-directive-output.c
@@ -73,3 +73,8 @@
 # 42 "A.c"
 # 44 "A.c"
 # 49 "A.c"
+
+// CHECK: # 100 "\202\261\202\361\202\311\202\277\202\315.c"
+// CHECK: filename = "\202\261\202\361\202\311\202\277\202\315.c";
+# 100 "\202\261\202\361\202\311\202\277\202\315.c"
+const char *filename = __FILE__;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1291.2.patch
Type: text/x-patch
Size: 3805 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130911/5bea5756/attachment.bin>


More information about the cfe-commits mailing list