[PATCH] D141392: Avoid u8"" literals in tests, their type changes in C++20
Jens Massberg via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 12 01:09:32 PST 2023
massberg updated this revision to Diff 488522.
massberg added a comment.
Fixed nits. Thanks for the comments!
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D141392/new/
https://reviews.llvm.org/D141392
Files:
llvm/include/llvm/Support/raw_ostream.h
llvm/unittests/Support/formatted_raw_ostream_test.cpp
Index: llvm/unittests/Support/formatted_raw_ostream_test.cpp
===================================================================
--- llvm/unittests/Support/formatted_raw_ostream_test.cpp
+++ llvm/unittests/Support/formatted_raw_ostream_test.cpp
@@ -92,34 +92,34 @@
formatted_raw_ostream C(B);
// U+00A0 Non-breaking space: encoded as two bytes, but only one column wide.
- C << u8"\u00a0";
+ C << "\xc2\xa0";
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(1U, C.getColumn());
EXPECT_EQ(2U, C.GetNumBytesInBuffer());
// U+2468 CIRCLED DIGIT NINE: encoded as three bytes, but only one column
// wide.
- C << u8"\u2468";
+ C << reinterpret_cast<const char *>(u8"\u2468");
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(2U, C.getColumn());
EXPECT_EQ(5U, C.GetNumBytesInBuffer());
// U+00010000 LINEAR B SYLLABLE B008 A: encoded as four bytes, but only one
// column wide.
- C << u8"\U00010000";
+ C << reinterpret_cast<const char *>(u8"\U00010000");
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(3U, C.getColumn());
EXPECT_EQ(9U, C.GetNumBytesInBuffer());
// U+55B5, CJK character, encodes as three bytes, takes up two columns.
- C << u8"\u55b5";
+ C << reinterpret_cast<const char *>(u8"\u55b5");
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(5U, C.getColumn());
EXPECT_EQ(12U, C.GetNumBytesInBuffer());
// U+200B, zero-width space, encoded as three bytes but has no effect on the
// column or line number.
- C << u8"\u200b";
+ C << reinterpret_cast<const char *>(u8"\u200b");
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(5U, C.getColumn());
EXPECT_EQ(15U, C.GetNumBytesInBuffer());
@@ -137,7 +137,7 @@
// the remaining two bytes are written, at which point we can check the
// display width. In this case the display width is 1, so we end at column 4,
// with 6 bytes written into total, 2 of which are in the buffer.
- C << u8"123\u2468";
+ C << reinterpret_cast<const char *>(u8"123\u2468");
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(4U, C.getColumn());
EXPECT_EQ(2U, C.GetNumBytesInBuffer());
@@ -145,7 +145,7 @@
EXPECT_EQ(6U, A.size());
// Same as above, but with a CJK character which displays as two columns.
- C << u8"123\u55b5";
+ C << reinterpret_cast<const char *>(u8"123\u55b5");
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(9U, C.getColumn());
EXPECT_EQ(2U, C.GetNumBytesInBuffer());
@@ -161,7 +161,7 @@
// The stream has a one-byte buffer, so it gets flushed multiple times while
// printing a single Unicode character.
- C << u8"\u2468";
+ C << "\xe2\x91\xa8";
EXPECT_EQ(0U, C.getLine());
EXPECT_EQ(1U, C.getColumn());
EXPECT_EQ(0U, C.GetNumBytesInBuffer());
Index: llvm/include/llvm/Support/raw_ostream.h
===================================================================
--- llvm/include/llvm/Support/raw_ostream.h
+++ llvm/include/llvm/Support/raw_ostream.h
@@ -224,6 +224,20 @@
return *this;
}
+#if defined(__cpp_char8_t)
+ // When using `char8_t *` integers or pointers are written to the ostream
+ // instead of UTF-8 code as one might expect. This might lead to unexpected
+ // behavior, especially as `u8""` literals are of type `char8_t*` instead of
+ // type `char_t*` from C++20 onwards. Thus we disallow using them with
+ // raw_ostreams.
+ // If you have u8"" literals to stream, you can rewrite them as ordinary
+ // literals with escape sequences
+ // e.g. replace `u8"\u00a0"` by `"\xc2\xa0"`
+ // or use `reinterpret_cast`:
+ // e.g. replace `u8"\u00a0"` by `reinterpret_cast<const char *>(u8"\u00a0")`
+ raw_ostream &operator<<(const char8_t *Str) = delete;
+#endif
+
raw_ostream &operator<<(const char *Str) {
// Inline fast path, particularly for constant strings where a sufficiently
// smart compiler will simplify strlen.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D141392.488522.patch
Type: text/x-patch
Size: 3791 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230112/e5ed8d8f/attachment-0001.bin>
More information about the llvm-commits
mailing list