[PATCH] Frontend: Fix SourceColumnMap assertion failure on non-ascii characters.

Logan Chien tzuhsiang.chien at gmail.com
Sat Dec 20 01:17:56 PST 2014


Hi rsmith,

http://reviews.llvm.org/D6746

Files:
  lib/Frontend/TextDiagnostic.cpp
  test/Frontend/source-col-map.c

Index: lib/Frontend/TextDiagnostic.cpp
===================================================================
--- lib/Frontend/TextDiagnostic.cpp
+++ lib/Frontend/TextDiagnostic.cpp
@@ -293,14 +293,14 @@
 
   /// \brief Map from a byte index to the next byte which starts a column.
   int startOfNextColumn(int N) const {
-    assert(0 <= N && N < static_cast<int>(m_columnToByte.size() - 1));
+    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1));
     while (byteToColumn(++N) == -1) {}
     return N;
   }
 
   /// \brief Map from a byte index to the previous byte which starts a column.
   int startOfPreviousColumn(int N) const {
-    assert(0 < N && N < static_cast<int>(m_columnToByte.size()));
+    assert(0 < N && N < static_cast<int>(m_byteToColumn.size()));
     while (byteToColumn(--N) == -1) {}
     return N;
   }
@@ -323,9 +323,10 @@
                                           std::string &FixItInsertionLine,
                                           unsigned Columns,
                                           const SourceColumnMap &map) {
-  unsigned MaxColumns = std::max<unsigned>(map.columns(),
-                                           std::max(CaretLine.size(),
-                                                    FixItInsertionLine.size()));
+  unsigned CaretColumns = CaretLine.size();
+  unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine);
+  unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()),
+                                 std::max(CaretColumns, FixItColumns));
   // if the number of columns is less than the desired number we're done
   if (MaxColumns <= Columns)
     return;
@@ -1110,12 +1111,13 @@
   // Copy the line of code into an std::string for ease of manipulation.
   std::string SourceLine(LineStart, LineEnd);
 
-  // Create a line for the caret that is filled with spaces that is the same
-  // length as the line of source code.
-  std::string CaretLine(LineEnd-LineStart, ' ');
-
+  // Build the byte to column map.
   const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
 
+  // Create a line for the caret that is filled with spaces that is the same
+  // number of columns as the line of source code.
+  std::string CaretLine(sourceColMap.columns(), ' ');
+
   // Highlight all of the characters covered by Ranges with ~ characters.
   for (SmallVectorImpl<CharSourceRange>::iterator I = Ranges.begin(),
                                                   E = Ranges.end();
Index: test/Frontend/source-col-map.c
===================================================================
--- /dev/null
+++ test/Frontend/source-col-map.c
@@ -0,0 +1,41 @@
+// RUN: not %clang_cc1 %s -fsyntax-only -fmessage-length 75 -o /dev/null 2>&1 | FileCheck %s -strict-whitespace
+
+// Test case for the text diagnostics source column conversion crash.
+
+// There are two problems to be tested:
+//
+// 1. The assertion in startOfNextColumn() and startOfPreviousColumn()
+//    should not be raised when the byte index is greater than the column index
+//    since the non-ascii characters may use more than one bytes to store a
+//    character in a column.
+//
+// 2. The length of the caret line should be equal to the number of columns of
+//    source line, instead of the length of the source line.  Otherwise, the
+//    assertion in selectInterestingSourceRegion will be raised because the
+//    removed columns plus the kept columns are not greater than the max column,
+//    which means that we should not remove any column at all.
+
+// NOTE: This file is encoded in UTF-8 and intentionally contains some
+// non-ASCII characters.
+
+__attribute__((format(printf, 1, 2)))
+extern int printf(const char *fmt, ...);
+
+void test1(Unknown* b);  // αααα αααα αααα αααα αααα αααα αααα αααα αααα αααα αααα
+// CHECK: unknown type name 'Unknown'
+// CHECK-NEXT: void test1(Unknown* b);  // αααα αααα αααα αααα αααα αααα αααα ααα...
+// CHECK-NEXT: {{^           \^$}}
+
+void test2(Unknown* b);  // αααα αααα αααα αααα αααα αααα αααα αααα αααα
+
+// CHECK: unknown type name 'Unknown'
+// CHECK-NEXT: void test2(Unknown* b);  // αααα αααα αααα αααα αααα αααα αααα αααα αααα
+// CHECK-NEXT: {{^           \^$}}
+
+void test3() {
+   /* αααα αααα αααα αααα αααα αααα αααα αααα αααα αααα */ printf("%d", "s");
+}
+// CHECK:       format specifies type 'int' but the argument has type 'char *'
+// CHECK-NEXT:   ...αααα αααα αααα αααα αααα αααα αααα αααα αααα */ printf("%d", "s");
+// CHECK-NEXT: {{^                                                             ~~   \^~~$}}
+// CHECK-NEXT: {{^                                                             %s$}}

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D6746.17529.patch
Type: text/x-patch
Size: 4892 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20141220/9b7431cf/attachment.bin>


More information about the cfe-commits mailing list