[clang] 6669dc0 - [clang][NFC] Refactor printableTextForNextCharacter

Timm Bäder via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 9 23:40:26 PDT 2023


Author: Timm Bäder
Date: 2023-06-10T07:51:36+02:00
New Revision: 6669dc09a441f27f3b5e18805a1a59fbe93a4cd6

URL: https://github.com/llvm/llvm-project/commit/6669dc09a441f27f3b5e18805a1a59fbe93a4cd6
DIFF: https://github.com/llvm/llvm-project/commit/6669dc09a441f27f3b5e18805a1a59fbe93a4cd6.diff

LOG: [clang][NFC] Refactor printableTextForNextCharacter

Differential Revision: https://reviews.llvm.org/D150843

Added: 
    

Modified: 
    clang/lib/Frontend/TextDiagnostic.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 3cdf86f5c8a6e..48082d7273e51 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -91,73 +91,78 @@ static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) {
 ///        printableTextForNextCharacter.
 ///
 /// \param SourceLine The line of source
-/// \param i Pointer to byte index,
+/// \param I Pointer to byte index,
 /// \param TabStop used to expand tabs
 /// \return pair(printable text, 'true' iff original text was printable)
 ///
 static std::pair<SmallString<16>, bool>
-printableTextForNextCharacter(StringRef SourceLine, size_t *i,
+printableTextForNextCharacter(StringRef SourceLine, size_t *I,
                               unsigned TabStop) {
-  assert(i && "i must not be null");
-  assert(*i<SourceLine.size() && "must point to a valid index");
+  assert(I && "I must not be null");
+  assert(*I < SourceLine.size() && "must point to a valid index");
 
-  if (SourceLine[*i]=='\t') {
+  if (SourceLine[*I] == '\t') {
     assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&
            "Invalid -ftabstop value");
-    unsigned col = bytesSincePreviousTabOrLineBegin(SourceLine, *i);
-    unsigned NumSpaces = TabStop - col%TabStop;
+    unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
+    unsigned NumSpaces = TabStop - (Col % TabStop);
     assert(0 < NumSpaces && NumSpaces <= TabStop
            && "Invalid computation of space amt");
-    ++(*i);
+    ++(*I);
 
-    SmallString<16> expandedTab;
-    expandedTab.assign(NumSpaces, ' ');
-    return std::make_pair(expandedTab, true);
+    SmallString<16> ExpandedTab;
+    ExpandedTab.assign(NumSpaces, ' ');
+    return std::make_pair(ExpandedTab, true);
   }
 
-  unsigned char const *begin, *end;
-  begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i));
-  end = begin + (SourceLine.size() - *i);
-
-  if (llvm::isLegalUTF8Sequence(begin, end)) {
-    llvm::UTF32 c;
-    llvm::UTF32 *cptr = &c;
-    unsigned char const *original_begin = begin;
-    unsigned char const *cp_end =
-        begin + llvm::getNumBytesForUTF8(SourceLine[*i]);
-
-    llvm::ConversionResult res = llvm::ConvertUTF8toUTF32(
-        &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion);
-    (void)res;
-    assert(llvm::conversionOK == res);
-    assert(0 < begin-original_begin
-           && "we must be further along in the string now");
-    *i += begin-original_begin;
-
-    if (!llvm::sys::locale::isPrint(c)) {
-      // If next character is valid UTF-8, but not printable
-      SmallString<16> expandedCP("<U+>");
-      while (c) {
-        expandedCP.insert(expandedCP.begin()+3, llvm::hexdigit(c%16));
-        c/=16;
-      }
-      while (expandedCP.size() < 8)
-        expandedCP.insert(expandedCP.begin()+3, llvm::hexdigit(0));
-      return std::make_pair(expandedCP, false);
-    }
-
-    // If next character is valid UTF-8, and printable
-    return std::make_pair(SmallString<16>(original_begin, cp_end), true);
+  const unsigned char *Begin = SourceLine.bytes_begin() + *I;
 
+  // Fast path for the common ASCII case.
+  if (*Begin < 0x80 && llvm::sys::locale::isPrint(*Begin)) {
+    ++(*I);
+    return std::make_pair(SmallString<16>(Begin, Begin + 1), true);
+  }
+  unsigned CharSize = llvm::getNumBytesForUTF8(*Begin);
+  const unsigned char *End = Begin + CharSize;
+
+  // Convert it to UTF32 and check if it's printable.
+  if (End <= SourceLine.bytes_end() && llvm::isLegalUTF8Sequence(Begin, End)) {
+    llvm::UTF32 C;
+    llvm::UTF32 *CPtr = &C;
+
+    // Begin and end before conversion.
+    unsigned char const *OriginalBegin = Begin;
+    llvm::ConversionResult Res = llvm::ConvertUTF8toUTF32(
+        &Begin, End, &CPtr, CPtr + 1, llvm::strictConversion);
+    (void)Res;
+    assert(Res == llvm::conversionOK);
+    assert(OriginalBegin < Begin);
+    assert((Begin - OriginalBegin) == CharSize);
+
+    (*I) += (Begin - OriginalBegin);
+
+    // Valid, multi-byte, printable UTF8 character.
+    if (llvm::sys::locale::isPrint(C))
+      return std::make_pair(SmallString<16>(OriginalBegin, End), true);
+
+    // Valid but not printable.
+    SmallString<16> Str("<U+>");
+    while (C) {
+      Str.insert(Str.begin() + 3, llvm::hexdigit(C % 16));
+      C /= 16;
+    }
+    while (Str.size() < 8)
+      Str.insert(Str.begin() + 3, llvm::hexdigit(0));
+    return std::make_pair(Str, false);
   }
 
-  // If next byte is not valid UTF-8 (and therefore not printable)
-  SmallString<16> expandedByte("<XX>");
-  unsigned char byte = SourceLine[*i];
-  expandedByte[1] = llvm::hexdigit(byte / 16);
-  expandedByte[2] = llvm::hexdigit(byte % 16);
-  ++(*i);
-  return std::make_pair(expandedByte, false);
+  // Otherwise, not printable since it's not valid UTF8.
+  SmallString<16> ExpandedByte("<XX>");
+  unsigned char Byte = SourceLine[*I];
+  ExpandedByte[1] = llvm::hexdigit(Byte / 16);
+  ExpandedByte[2] = llvm::hexdigit(Byte % 16);
+  ++(*I);
+  return std::make_pair(ExpandedByte, false);
 }
 
 static void expandTabs(std::string &SourceLine, unsigned TabStop) {


        


More information about the cfe-commits mailing list