[PATCH] D33765: Show correct column nr. when multi-byte utf8 chars are used.
Erik Verbruggen via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 1 03:14:23 PDT 2017
erikjv created this revision.
Previously, the column number in a diagnostic would be the byte position
in the line. This results in incorrect column numbers when a multi-byte
UTF-8 character would be present in the input. By ignoring all bytes
starting with 0b10.... the correct column number is created.
This fixes PR21144.
https://reviews.llvm.org/D33765
Files:
include/clang/Basic/SourceManager.h
lib/Basic/SourceManager.cpp
test/Misc/diag-utf8.cpp
Index: test/Misc/diag-utf8.cpp
===================================================================
--- /dev/null
+++ test/Misc/diag-utf8.cpp
@@ -0,0 +1,11 @@
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+struct Foo { int member; };
+
+void f(Foo foo)
+{
+ "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+ "ideƫen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+}
+
+
Index: lib/Basic/SourceManager.cpp
===================================================================
--- lib/Basic/SourceManager.cpp
+++ lib/Basic/SourceManager.cpp
@@ -1055,11 +1055,22 @@
return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
}
+static unsigned correctForMultiByteChars(const char *Buf, unsigned LineStart,
+ unsigned Column)
+{
+ unsigned CorrectedColumn = Column;
+ for (unsigned I = 0; I < Column; ++I) {
+ if ((Buf[LineStart + I] & 0xc0) == 0x80)
+ --CorrectedColumn;
+ }
+ return CorrectedColumn;
+}
/// getColumnNumber - Return the column # for the specified file position.
/// this is significantly cheaper to compute than the line number.
unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
- bool *Invalid) const {
+ bool *Invalid,
+ bool BytePosition) const {
bool MyInvalid = false;
llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid);
if (Invalid)
@@ -1093,14 +1104,18 @@
if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n')
--FilePos;
}
- return FilePos - LineStart + 1;
+ unsigned Column = FilePos - LineStart + 1;
+ return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart,
+ Column);
}
}
unsigned LineStart = FilePos;
while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
--LineStart;
- return FilePos-LineStart+1;
+ unsigned Column = FilePos - LineStart + 1;
+ return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart,
+ Column);
}
// isInvalid - Return the result of calling loc.isInvalid(), and
@@ -1425,7 +1440,8 @@
unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, &Invalid);
if (Invalid)
return PresumedLoc();
- unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid);
+ unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid,
+ false);
if (Invalid)
return PresumedLoc();
Index: include/clang/Basic/SourceManager.h
===================================================================
--- include/clang/Basic/SourceManager.h
+++ include/clang/Basic/SourceManager.h
@@ -1275,7 +1275,8 @@
/// on a file sloc, so you must choose a spelling or expansion location
/// before calling this method.
unsigned getColumnNumber(FileID FID, unsigned FilePos,
- bool *Invalid = nullptr) const;
+ bool *Invalid = nullptr,
+ bool BytePosition = true) const;
unsigned getSpellingColumnNumber(SourceLocation Loc,
bool *Invalid = nullptr) const;
unsigned getExpansionColumnNumber(SourceLocation Loc,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33765.100992.patch
Type: text/x-patch
Size: 3599 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170601/960df478/attachment.bin>
More information about the cfe-commits
mailing list