[PATCH] D33765: Show correct column nr. when multi-byte utf8 chars are used.

Erik Verbruggen via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 30 03:26:04 PST 2017


erikjv updated this revision to Diff 124903.
erikjv added a comment.

I moved all code to the TextDiagnostics, so all other interfaces still get byte offsets.


https://reviews.llvm.org/D33765

Files:
  lib/Frontend/TextDiagnostic.cpp
  test/Misc/diag-utf8.cpp


Index: test/Misc/diag-utf8.cpp
===================================================================
--- /dev/null
+++ test/Misc/diag-utf8.cpp
@@ -0,0 +1,10 @@
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+struct Foo { int member; };
+
+void f(Foo foo)
+{
+    "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+    "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+    "idez̈en" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:9:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+}
Index: lib/Frontend/TextDiagnostic.cpp
===================================================================
--- lib/Frontend/TextDiagnostic.cpp
+++ lib/Frontend/TextDiagnostic.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Unicode.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -818,6 +819,28 @@
   if (DiagOpts->ShowColumn)
     // Compute the column number.
     if (unsigned ColNo = PLoc.getColumn()) {
+      // Correct the column number for multi-byte UTF-8 code-points.
+      bool Invalid = false;
+      StringRef BufData = Loc.getBufferData(&Invalid);
+      if (!Invalid) {
+        const char *BufStart = BufData.data();
+        const char *BufEnd = BufStart + BufData.size();
+
+        // Decompose the location into a FID/Offset pair.
+        std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
+        FileID FID = LocInfo.first;
+        const SourceManager &SM = Loc.getManager();
+        const char *LineStart =
+            BufStart +
+            SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second;
+        if (LineStart + ColNo < BufEnd) {
+          StringRef SourceLine(LineStart, ColNo);
+          int CorrectedColNo = llvm::sys::unicode::columnWidthUTF8(SourceLine);
+          if (CorrectedColNo != -1)
+            ColNo = unsigned(CorrectedColNo);
+        }
+      }
+
       if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) {
         OS << ',';
         // Visual Studio 2010 or earlier expects column number to be off by one


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33765.124903.patch
Type: text/x-patch
Size: 2301 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20171130/c2bc05e6/attachment-0001.bin>


More information about the cfe-commits mailing list