[PATCH] D33765: Show correct column nr. when multi-byte utf8 chars are used.

David Blaikie via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 5 10:01:59 PDT 2017


Is it right to only change the behavior of this caller? Presumably other
callers (like getSpellingColumnNumber, getExpansionColumnNumber, etc)
probably want the same handling? Do any callers /not/ want this behavior?

On Thu, Jun 1, 2017 at 3:14 AM Erik Verbruggen via Phabricator via
cfe-commits <cfe-commits at lists.llvm.org> wrote:

> erikjv created this revision.
>
> Previously, the column number in a diagnostic would be the byte position
> in the line. This results in incorrect column numbers when a multi-byte
> UTF-8 character would be present in the input. By ignoring all bytes
> starting with 0b10.... the correct column number is created.
>
> This fixes PR21144.
>
>
> https://reviews.llvm.org/D33765
>
> Files:
>   include/clang/Basic/SourceManager.h
>   lib/Basic/SourceManager.cpp
>   test/Misc/diag-utf8.cpp
>
>
> Index: test/Misc/diag-utf8.cpp
> ===================================================================
> --- /dev/null
> +++ test/Misc/diag-utf8.cpp
> @@ -0,0 +1,11 @@
> +// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
> +
> +struct Foo { int member; };
> +
> +void f(Foo foo)
> +{
> +    "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error:
> invalid operands to binary expression ('const char *' and 'Foo')
> +    "ideƫen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error:
> invalid operands to binary expression ('const char *' and 'Foo')
> +}
> +
> +
> Index: lib/Basic/SourceManager.cpp
> ===================================================================
> --- lib/Basic/SourceManager.cpp
> +++ lib/Basic/SourceManager.cpp
> @@ -1055,11 +1055,22 @@
>    return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
>  }
>
> +static unsigned correctForMultiByteChars(const char *Buf, unsigned
> LineStart,
> +                                         unsigned Column)
> +{
> +    unsigned CorrectedColumn = Column;
> +    for (unsigned I = 0; I < Column; ++I) {
> +        if ((Buf[LineStart + I] & 0xc0) == 0x80)
> +            --CorrectedColumn;
> +    }
> +    return CorrectedColumn;
> +}
>
>  /// getColumnNumber - Return the column # for the specified file position.
>  /// this is significantly cheaper to compute than the line number.
>  unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
> -                                        bool *Invalid) const {
> +                                        bool *Invalid,
> +                                        bool BytePosition) const {
>    bool MyInvalid = false;
>    llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid);
>    if (Invalid)
> @@ -1093,14 +1104,18 @@
>          if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n')
>            --FilePos;
>        }
> -      return FilePos - LineStart + 1;
> +      unsigned Column = FilePos - LineStart + 1;
> +      return BytePosition ? Column : correctForMultiByteChars(Buf,
> LineStart,
> +                                                              Column);
>      }
>    }
>
>    unsigned LineStart = FilePos;
>    while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] !=
> '\r')
>      --LineStart;
> -  return FilePos-LineStart+1;
> +  unsigned Column = FilePos - LineStart + 1;
> +  return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart,
> +                                                          Column);
>  }
>
>  // isInvalid - Return the result of calling loc.isInvalid(), and
> @@ -1425,7 +1440,8 @@
>    unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second,
> &Invalid);
>    if (Invalid)
>      return PresumedLoc();
> -  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second,
> &Invalid);
> +  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second,
> &Invalid,
> +                                    false);
>    if (Invalid)
>      return PresumedLoc();
>
> Index: include/clang/Basic/SourceManager.h
> ===================================================================
> --- include/clang/Basic/SourceManager.h
> +++ include/clang/Basic/SourceManager.h
> @@ -1275,7 +1275,8 @@
>    /// on a file sloc, so you must choose a spelling or expansion location
>    /// before calling this method.
>    unsigned getColumnNumber(FileID FID, unsigned FilePos,
> -                           bool *Invalid = nullptr) const;
> +                           bool *Invalid = nullptr,
> +                           bool BytePosition = true) const;
>    unsigned getSpellingColumnNumber(SourceLocation Loc,
>                                     bool *Invalid = nullptr) const;
>    unsigned getExpansionColumnNumber(SourceLocation Loc,
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170605/1bc4d841/attachment.html>


More information about the cfe-commits mailing list