[PATCH] D92180: Speedup some unicode rendering
serge via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 3 11:11:41 PST 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9501419e879e: Speedup some unicode rendering (authored by serge-sans-paille).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D92180/new/
https://reviews.llvm.org/D92180
Files:
llvm/lib/Support/Unicode.cpp
llvm/unittests/Support/UnicodeTest.cpp
Index: llvm/unittests/Support/UnicodeTest.cpp
===================================================================
--- llvm/unittests/Support/UnicodeTest.cpp
+++ llvm/unittests/Support/UnicodeTest.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Unicode.h"
+#include "llvm/Support/ConvertUTF.h"
#include "gtest/gtest.h"
namespace llvm {
@@ -23,6 +24,7 @@
EXPECT_EQ(6, columnWidthUTF8("abcdef"));
EXPECT_EQ(-1, columnWidthUTF8("\x01"));
+ EXPECT_EQ(-1, columnWidthUTF8("\t"));
EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01"));
EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE
@@ -84,6 +86,19 @@
EXPECT_TRUE(isPrintable(0x20000)); // CJK UNIFIED IDEOGRAPH-20000
EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter
+
+ // test the validity of a fast path in columnWidthUTF8
+ for (unsigned char c = 0; c < 128; ++c) {
+ const UTF8 buf8[2] = {c, 0};
+ const UTF8 *Target8 = &buf8[0];
+ UTF32 buf32[1];
+ UTF32 *Target32 = &buf32[0];
+ auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32,
+ Target32 + 1, strictConversion);
+ EXPECT_TRUE(status == conversionOK);
+ EXPECT_TRUE((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1) ==
+ (bool)isPrintable(buf32[0]));
+ }
}
} // namespace
Index: llvm/lib/Support/Unicode.cpp
===================================================================
--- llvm/lib/Support/Unicode.cpp
+++ llvm/lib/Support/Unicode.cpp
@@ -339,11 +339,22 @@
return 1;
}
+static bool isprintableascii(char c) { return c > 31 && c < 127; }
+
int columnWidthUTF8(StringRef Text) {
unsigned ColumnWidth = 0;
unsigned Length;
for (size_t i = 0, e = Text.size(); i < e; i += Length) {
Length = getNumBytesForUTF8(Text[i]);
+
+ // fast path for ASCII characters
+ if (Length == 1) {
+ if (!isprintableascii(Text[i]))
+ return ErrorNonPrintableCharacter;
+ ColumnWidth += 1;
+ continue;
+ }
+
if (Length <= 0 || i + Length > Text.size())
return ErrorInvalidUTF8;
UTF32 buf[1];
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D92180.309320.patch
Type: text/x-patch
Size: 2187 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201203/e8e88151/attachment.bin>
More information about the llvm-commits
mailing list