[cfe-commits] r163820 - in /cfe/trunk: lib/Frontend/TextDiagnostic.cpp test/Misc/unprintable.c

Thu Sep 13 11:37:50 PDT 2012

Author: rsmith
Date: Thu Sep 13 13:37:50 2012
New Revision: 163820

URL: http://llvm.org/viewvc/llvm-project?rev=163820&view=rev
Log:
Make TextDiagnostic more robust against SourceLocations which point into the
middle of UTF-8 characters, and avoid walking to such positions when adjusting
column ranges for display. Fixes a couple of hangs when rendering diagnostics.

Modified:
    cfe/trunk/lib/Frontend/TextDiagnostic.cpp
    cfe/trunk/test/Misc/unprintable.c

Modified: cfe/trunk/lib/Frontend/TextDiagnostic.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/TextDiagnostic.cpp?rev=163820&r1=163819&r2=163820&view=diff
==============================================================================

--- cfe/trunk/lib/Frontend/TextDiagnostic.cpp (original)
+++ cfe/trunk/lib/Frontend/TextDiagnostic.cpp Thu Sep 13 13:37:50 2012
@@ -274,14 +274,44 @@
   }
   int columns() const { return m_byteToColumn.back(); }
   int bytes() const { return m_columnToByte.back(); }
+
+  /// \brief Map a byte to the column which it is at the start of, or return -1
+  /// if it is not at the start of a column (for a UTF-8 trailing byte).
   int byteToColumn(int n) const {
     assert(0<=n && n<static_cast<int>(m_byteToColumn.size()));
     return m_byteToColumn[n];
   }
+
+  /// \brief Map a byte to the first column which contains it.
+  int byteToContainingColumn(int N) const {
+    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size()));
+    while (m_byteToColumn[N] == -1)
+      --N;
+    return m_byteToColumn[N];
+  }
+
+  /// \brief Map a column to the byte which starts the column, or return -1 if
+  /// the column the second or subsequent column of an expanded tab or similar
+  /// multi-column entity.
   int columnToByte(int n) const {
     assert(0<=n && n<static_cast<int>(m_columnToByte.size()));
     return m_columnToByte[n];
   }
+
+  /// \brief Map from a byte index to the next byte which starts a column.
+  int startOfNextColumn(int N) const {
+    assert(0 <= N && N < static_cast<int>(m_columnToByte.size() - 1));
+    while (byteToColumn(++N) == -1) {}
+    return N;
+  }
+
+  /// \brief Map from a byte index to the previous byte which starts a column.
+  int startOfPreviousColumn(int N) const {
+    assert(0 < N && N < static_cast<int>(m_columnToByte.size()));
+    while (byteToColumn(N--) == -1) {}
+    return N;
+  }
+
   StringRef getSourceLine() const {
     return m_SourceLine;
   }
@@ -402,21 +432,20 @@
 
       // Skip over any whitespace we see here; we're looking for
       // another bit of interesting text.
+      // FIXME: Detect non-ASCII whitespace characters too.
       while (NewStart &&
-             (map.byteToColumn(NewStart)==-1 ||
-             isspace(static_cast<unsigned char>(SourceLine[NewStart]))))
-        --NewStart;
+             isspace(static_cast<unsigned char>(SourceLine[NewStart])))
+        NewStart = map.startOfPreviousColumn(NewStart);
 
       // Skip over this bit of "interesting" text.
-      while (NewStart &&
-             (map.byteToColumn(NewStart)!=-1 &&
-             !isspace(static_cast<unsigned char>(SourceLine[NewStart]))))
-        --NewStart;
-
-      // Move up to the non-whitespace character we just saw.
-      if (NewStart)
-        ++NewStart;
+      while (NewStart) {
+        unsigned Prev = map.startOfPreviousColumn(NewStart);
+        if (isspace(static_cast<unsigned char>(SourceLine[Prev])))
+          break;
+        NewStart = Prev;
+      }
 
+      assert(map.byteToColumn(NewStart) != -1);
       unsigned NewColumns = map.byteToColumn(SourceEnd) -
                               map.byteToColumn(NewStart);
       if (NewColumns <= TargetColumns) {
@@ -430,17 +459,17 @@
 
       // Skip over any whitespace we see here; we're looking for
       // another bit of interesting text.
-      while (NewEnd<SourceLine.size() &&
-             (map.byteToColumn(NewEnd)==-1 ||
-             isspace(static_cast<unsigned char>(SourceLine[NewEnd]))))
-        ++NewEnd;
+      // FIXME: Detect non-ASCII whitespace characters too.
+      while (NewEnd < SourceLine.size() &&
+             isspace(static_cast<unsigned char>(SourceLine[NewEnd])))
+        NewEnd = map.startOfNextColumn(NewEnd);
 
       // Skip over this bit of "interesting" text.
-      while (NewEnd<SourceLine.size() &&
-             (map.byteToColumn(NewEnd)!=-1 &&
-             !isspace(static_cast<unsigned char>(SourceLine[NewEnd]))))
-        ++NewEnd;
+      while (NewEnd < SourceLine.size() &&
+             !isspace(static_cast<unsigned char>(SourceLine[NewEnd])))
+        NewEnd = map.startOfNextColumn(NewEnd);
 
+      assert(map.byteToColumn(NewEnd) != -1);
       unsigned NewColumns = map.byteToColumn(NewEnd) -
                               map.byteToColumn(SourceStart);
       if (NewColumns <= TargetColumns) {
@@ -933,7 +962,7 @@
     highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM);
 
   // Next, insert the caret itself.
-  ColNo = sourceColMap.byteToColumn(ColNo-1);
+  ColNo = sourceColMap.byteToContainingColumn(ColNo-1);
   if (CaretLine.size()<ColNo+1)
     CaretLine.resize(ColNo+1, ' ');
   CaretLine[ColNo] = '^';
@@ -1080,7 +1109,7 @@
     while (StartColNo < map.getSourceLine().size() &&
            (map.getSourceLine()[StartColNo] == ' ' ||
             map.getSourceLine()[StartColNo] == '\t'))
-      ++StartColNo;
+      StartColNo = map.startOfNextColumn(StartColNo);
 
     // Pick the last non-whitespace column.
     if (EndColNo > map.getSourceLine().size())
@@ -1088,7 +1117,7 @@
     while (EndColNo-1 &&
            (map.getSourceLine()[EndColNo-1] == ' ' ||
             map.getSourceLine()[EndColNo-1] == '\t'))
-      --EndColNo;
+      EndColNo = map.startOfPreviousColumn(EndColNo);
 
     // If the start/end passed each other, then we are trying to highlight a
     // range that just exists in whitespace, which must be some sort of other
@@ -1100,8 +1129,8 @@
   assert(EndColNo <= map.getSourceLine().size() && "Invalid range!");
 
   // Fill the range with ~'s.
-  StartColNo = map.byteToColumn(StartColNo);
-  EndColNo = map.byteToColumn(EndColNo);
+  StartColNo = map.byteToContainingColumn(StartColNo);
+  EndColNo = map.byteToContainingColumn(EndColNo);
 
   assert(StartColNo <= EndColNo && "Invalid range!");
   if (CaretLine.size() < EndColNo)
@@ -1139,7 +1168,7 @@
 
         // The hint must start inside the source or right at the end
         assert(HintByteOffset < static_cast<unsigned>(map.bytes())+1);
-        unsigned HintCol = map.byteToColumn(HintByteOffset);
+        unsigned HintCol = map.byteToContainingColumn(HintByteOffset);
 
         // If we inserted a long previous hint, push this one forwards, and add
         // an extra space to show that this is not part of the previous

Modified: cfe/trunk/test/Misc/unprintable.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/unprintable.c?rev=163820&r1=163819&r2=163820&view=diff
==============================================================================
--- cfe/trunk/test/Misc/unprintable.c (original)
+++ cfe/trunk/test/Misc/unprintable.c Thu Sep 13 13:37:50 2012
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s 2>&1 | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 %s -fmessage-length 40 2>&1 | FileCheck -strict-whitespace %s
 
 int main() {
     int i;
@@ -9,8 +9,25 @@
 // CHECK: {{^        ~\^~~~~~~~~~~~~~~~}}
 // CHECK: {{^       ~ \^               ~}}
 
+    (void)"ÊÑ¿ô";
+
+// CHECK: {{^    \(void\)"<CA><U\+047F><F4>";}}
+// CHECK: {{^           \^~~~}}
+
+Â  int n = 0;
+
+// CHECK: {{^<U\+00A0> int n = 0;}}
+// CHECK: {{^\^}}
+
+   "ð¿                                                              \z";
+
+// CHECK: {{^  \.\.\.\\z";}}
+// CHECK: {{^     \^~}}
+
+
     /* ð¿ */ "ð¿berhund";
 
 // CHECK: {{^    /\* <U\+1F47F> \*/ "<U\+1F47F>berhund";}}
 // CHECK: {{^                    \^~~~~~~~~~~~~~~~~~}}
-}
\ No newline at end of file
+
+}