[cfe-commits] r160319 - in /cfe/trunk: docs/UsersManual.html lib/Frontend/TextDiagnostic.cpp test/FixIt/fixit-unicode.c

Mon Jul 16 13:52:13 PDT 2012

Author: jrose
Date: Mon Jul 16 15:52:12 2012
New Revision: 160319

URL: http://llvm.org/viewvc/llvm-project?rev=160319&view=rev
Log:
Don't crash when emitting fixits following Unicode characters.

This code is very sensitive to the difference between "columns" as printed
and "bytes" (SourceManager columns). All variables are now named explicitly
and our assumptions are (hopefully) documented as both comment and assertion.

Whether parseable fixits should use byte offsets or Unicode character counts
is pending discussion on the mailing list; currently the implementation uses
bytes (and has no problems on lines containing multibyte characters).
This has been added to the user manual.

<rdar://problem/11877454>

Modified:
    cfe/trunk/docs/UsersManual.html
    cfe/trunk/lib/Frontend/TextDiagnostic.cpp
    cfe/trunk/test/FixIt/fixit-unicode.c

Modified: cfe/trunk/docs/UsersManual.html
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/UsersManual.html?rev=160319&r1=160318&r2=160319&view=diff
==============================================================================

--- cfe/trunk/docs/UsersManual.html (original)
+++ cfe/trunk/docs/UsersManual.html Mon Jul 16 15:52:12 2012
@@ -229,6 +229,9 @@
 
 <p>When this is disabled, Clang will print "test.c:28: warning..." with no
 column number.</p>
+
+<p>The printed column numbers count bytes from the beginning of the line; take
+care if your source contains multibyte characters.</p>
 </dd>
 
 <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
@@ -395,6 +398,9 @@
 </pre>
 
 <p>The {}'s are generated by -fdiagnostics-print-source-range-info.</p>
+
+<p>The printed column numbers count bytes from the beginning of the line; take
+care if your source contains multibyte characters.</p>
 </dd>
 
 <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
@@ -415,6 +421,9 @@
 "\\"), tabs (as "\t"), newlines (as "\n"), double
 quotes(as "\"") and non-printable characters (as octal
 "\xxx").</p>
+
+<p>The printed column numbers count bytes from the beginning of the line; take
+care if your source contains multibyte characters.</p>
 </dd>
 
 <dt id="opt_fno-elide-type">

Modified: cfe/trunk/lib/Frontend/TextDiagnostic.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/TextDiagnostic.cpp?rev=160319&r1=160318&r2=160319&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/TextDiagnostic.cpp (original)
+++ cfe/trunk/lib/Frontend/TextDiagnostic.cpp Mon Jul 16 15:52:12 2012
@@ -1124,7 +1124,7 @@
   std::string FixItInsertionLine;
   if (Hints.empty() || !DiagOpts.ShowFixits)
     return FixItInsertionLine;
-  unsigned PrevHintEnd = 0;
+  unsigned PrevHintEndCol = 0;
 
   for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end();
        I != E; ++I) {
@@ -1136,11 +1136,15 @@
       if (LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second)) {
         // Insert the new code into the line just below the code
         // that the user wrote.
-        unsigned HintColNo
+        // Note: When modifying this function, be very careful about what is a
+        // "column" (printed width, platform-dependent) and what is a
+        // "byte offset" (SourceManager "column").
+        unsigned HintByteOffset
           = SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
-        // hint must start inside the source or right at the end
-        assert(HintColNo<static_cast<unsigned>(map.bytes())+1);
-        HintColNo = map.byteToColumn(HintColNo);
+
+        // The hint must start inside the source or right at the end
+        assert(HintByteOffset < static_cast<unsigned>(map.bytes())+1);
+        unsigned HintCol = map.byteToColumn(HintByteOffset);
 
         // If we inserted a long previous hint, push this one forwards, and add
         // an extra space to show that this is not part of the previous
@@ -1149,32 +1153,27 @@
         //
         // Note that if this hint is located immediately after the previous
         // hint, no space will be added, since the location is more important.
-        if (HintColNo < PrevHintEnd)
-          HintColNo = PrevHintEnd + 1;
-
-        // FIXME: if the fixit includes tabs or other characters that do not
-        //  take up a single column per byte when displayed then
-        //  I->CodeToInsert.size() is not a column number and we're mixing
-        //  units (columns + bytes). We should get printable versions
-        //  of each fixit before using them.
-        unsigned LastColumnModified
-          = HintColNo + I->CodeToInsert.size();
-
-        if (LastColumnModified <= static_cast<unsigned>(map.bytes())) {
-          // If we're right in the middle of a multibyte character skip to
-          // the end of it.
-          while (map.byteToColumn(LastColumnModified) == -1)
-            ++LastColumnModified;
-          LastColumnModified = map.byteToColumn(LastColumnModified);
-        }
+        if (HintCol < PrevHintEndCol)
+          HintCol = PrevHintEndCol + 1;
 
+        // FIXME: This function handles multibyte characters in the source, but
+        // not in the fixits. This assertion is intended to catch unintended
+        // use of multibyte characters in fixits. If we decide to do this, we'll
+        // have to track separate byte widths for the source and fixit lines.
+        assert((size_t)llvm::sys::locale::columnWidth(I->CodeToInsert) ==
+               I->CodeToInsert.size());
+
+        // This relies on one byte per column in our fixit hints.
+        // This should NOT use HintByteOffset, because the source might have
+        // Unicode characters in earlier columns.
+        unsigned LastColumnModified = HintCol + I->CodeToInsert.size();
         if (LastColumnModified > FixItInsertionLine.size())
           FixItInsertionLine.resize(LastColumnModified, ' ');
-        assert(HintColNo+I->CodeToInsert.size() <= FixItInsertionLine.size());
+
         std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(),
-                  FixItInsertionLine.begin() + HintColNo);
+                  FixItInsertionLine.begin() + HintCol);
 
-        PrevHintEnd = LastColumnModified;
+        PrevHintEndCol = LastColumnModified;
       } else {
         FixItInsertionLine.clear();
         break;

Modified: cfe/trunk/test/FixIt/fixit-unicode.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/FixIt/fixit-unicode.c?rev=160319&r1=160318&r2=160319&view=diff
==============================================================================
--- cfe/trunk/test/FixIt/fixit-unicode.c (original)
+++ cfe/trunk/test/FixIt/fixit-unicode.c Mon Jul 16 15:52:12 2012
@@ -1,10 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck -strict-whitespace %s
-// PR13312
+// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck -check-prefix=CHECK-MACHINE %s
 
 struct Foo {
   int bar;
 };
 
+// PR13312
 void test1() {
   struct Foo foo;
   (&foo)â>bar = 42;
@@ -12,4 +13,19 @@
 // Make sure we emit the fixit right in front of the snowman.
 // CHECK: {{^        \^}}
 // CHECK: {{^        ;}}
+
+// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{11:9-11:9}:";"
+}
+
+
+int printf(const char *, ...);
+void test2() {
+  printf("â: %d", 1L);
+// CHECK: warning: format specifies type 'int' but the argument has type 'long'
+// Don't crash emitting a fixit after the delta.
+//     CHECK-NEXT:  printf("â: %d", 1L);
+// CHECK-NEXT: {{^             ~~   \^~}}
+// CHECK-NEXT: {{^             %ld}}
+
+// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{23:16-23:18}:"%ld"
 }