[libcxx-commits] [libcxx] [libc++] Fix UTF-8 decoding in codecvts (PR #68442)
via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Nov 24 12:33:36 PST 2023
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 20f634f275b431ff256ba45cbcbb6dc5bd945fb3 0b6b3bea4a1b0d6640ace345fa444d0874e5806b -- libcxx/test/std/localization/codecvt_unicode.pass.cpp libcxx/src/locale.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 15cc2d7df6..ee17cbb9c3 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -1979,26 +1979,25 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
}
else if (c1 < 0xF0)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return codecvt_base::error;
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
return codecvt_base::error;
@@ -2012,34 +2011,33 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
}
else if (c1 < 0xF5)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return codecvt_base::error;
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return codecvt_base::error;
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
- return codecvt_base::error;
- if (frm_end-frm_nxt < 4)
- return codecvt_base::partial;
+ return codecvt_base::error;
+ if (frm_end - frm_nxt < 4)
+ return codecvt_base::partial;
uint8_t c4 = frm_nxt[3];
if ((c4 & 0xC0) != 0x80)
- return codecvt_base::error;
+ return codecvt_base::error;
if (to_end-to_nxt < 2)
return codecvt_base::partial;
if ((((c1 & 7UL) << 18) +
@@ -2108,26 +2106,25 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
}
else if (c1 < 0xF0)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return codecvt_base::error;
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
return codecvt_base::error;
@@ -2141,34 +2138,33 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
}
else if (c1 < 0xF5)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return codecvt_base::error;
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return codecvt_base::error;
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
- return codecvt_base::error;
- if (frm_end-frm_nxt < 4)
- return codecvt_base::partial;
+ return codecvt_base::error;
+ if (frm_end - frm_nxt < 4)
+ return codecvt_base::partial;
uint8_t c4 = frm_nxt[3];
if ((c4 & 0xC0) != 0x80)
- return codecvt_base::error;
+ return codecvt_base::error;
if (to_end-to_nxt < 2)
return codecvt_base::partial;
if ((((c1 & 7UL) << 18) +
@@ -2394,26 +2390,25 @@ utf8_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt
}
else if (c1 < 0xF0)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return codecvt_base::error;
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
return codecvt_base::error;
@@ -2427,34 +2422,33 @@ utf8_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt
}
else if (c1 < 0xF5)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return codecvt_base::error;
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return codecvt_base::error;
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
- return codecvt_base::error;
- if (frm_end-frm_nxt < 4)
- return codecvt_base::partial;
+ return codecvt_base::error;
+ if (frm_end - frm_nxt < 4)
+ return codecvt_base::partial;
uint8_t c4 = frm_nxt[3];
if ((c4 & 0xC0) != 0x80)
- return codecvt_base::error;
+ return codecvt_base::error;
uint32_t t = static_cast<uint32_t>(((c1 & 0x07) << 18)
| ((c2 & 0x3F) << 12)
| ((c3 & 0x3F) << 6)
@@ -2660,26 +2654,25 @@ utf8_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt
}
else if (c1 < 0xF0)
{
- if (frm_end-frm_nxt < 2)
- return codecvt_base::partial;
- uint8_t c2 = frm_nxt[1];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return codecvt_base::error;
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return codecvt_base::error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return codecvt_base::error;
- break;
+ if (frm_end - frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ switch (c1) {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
}
- if (frm_end-frm_nxt < 3)
- return codecvt_base::partial;
+ if (frm_end - frm_nxt < 3)
+ return codecvt_base::partial;
uint8_t c3 = frm_nxt[2];
if ((c3 & 0xC0) != 0x80)
return codecvt_base::error;
``````````
</details>
https://github.com/llvm/llvm-project/pull/68442
More information about the libcxx-commits
mailing list