[libcxx-commits] [libcxx] [libc++] Fix UTF-8 decoding in codecvts (PR #68442)

via libcxx-commits libcxx-commits at lists.llvm.org
Fri Nov 24 12:33:36 PST 2023


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->


:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 20f634f275b431ff256ba45cbcbb6dc5bd945fb3 0b6b3bea4a1b0d6640ace345fa444d0874e5806b -- libcxx/test/std/localization/codecvt_unicode.pass.cpp libcxx/src/locale.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 15cc2d7df6..ee17cbb9c3 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -1979,26 +1979,25 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
         }
         else if (c1 < 0xF0)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xE0:
-                if ((c2 & 0xE0) != 0xA0)
-                    return codecvt_base::error;
-                 break;
-            case 0xED:
-                if ((c2 & 0xE0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xE0:
+            if ((c2 & 0xE0) != 0xA0)
+              return codecvt_base::error;
+            break;
+          case 0xED:
+            if ((c2 & 0xE0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
                 return codecvt_base::error;
@@ -2012,34 +2011,33 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
         }
         else if (c1 < 0xF5)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xF0:
-                if (!(0x90 <= c2 && c2 <= 0xBF))
-                    return codecvt_base::error;
-                 break;
-            case 0xF4:
-                if ((c2 & 0xF0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xF0:
+            if (!(0x90 <= c2 && c2 <= 0xBF))
+              return codecvt_base::error;
+            break;
+          case 0xF4:
+            if ((c2 & 0xF0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                 return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
-                 return codecvt_base::error;
-            if (frm_end-frm_nxt < 4)
-                 return codecvt_base::partial;
+              return codecvt_base::error;
+            if (frm_end - frm_nxt < 4)
+              return codecvt_base::partial;
             uint8_t c4 = frm_nxt[3];
             if ((c4 & 0xC0) != 0x80)
-                 return codecvt_base::error;
+              return codecvt_base::error;
             if (to_end-to_nxt < 2)
                 return codecvt_base::partial;
             if ((((c1 & 7UL) << 18) +
@@ -2108,26 +2106,25 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
         }
         else if (c1 < 0xF0)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xE0:
-                if ((c2 & 0xE0) != 0xA0)
-                    return codecvt_base::error;
-                 break;
-            case 0xED:
-                if ((c2 & 0xE0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xE0:
+            if ((c2 & 0xE0) != 0xA0)
+              return codecvt_base::error;
+            break;
+          case 0xED:
+            if ((c2 & 0xE0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                 return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
                 return codecvt_base::error;
@@ -2141,34 +2138,33 @@ utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nx
         }
         else if (c1 < 0xF5)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xF0:
-                if (!(0x90 <= c2 && c2 <= 0xBF))
-                    return codecvt_base::error;
-                 break;
-            case 0xF4:
-                if ((c2 & 0xF0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xF0:
+            if (!(0x90 <= c2 && c2 <= 0xBF))
+              return codecvt_base::error;
+            break;
+          case 0xF4:
+            if ((c2 & 0xF0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                 return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
-                 return codecvt_base::error;
-            if (frm_end-frm_nxt < 4)
-                 return codecvt_base::partial;
+              return codecvt_base::error;
+            if (frm_end - frm_nxt < 4)
+              return codecvt_base::partial;
             uint8_t c4 = frm_nxt[3];
             if ((c4 & 0xC0) != 0x80)
-                 return codecvt_base::error;
+              return codecvt_base::error;
             if (to_end-to_nxt < 2)
                 return codecvt_base::partial;
             if ((((c1 & 7UL) << 18) +
@@ -2394,26 +2390,25 @@ utf8_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt
         }
         else if (c1 < 0xF0)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xE0:
-                if ((c2 & 0xE0) != 0xA0)
-                    return codecvt_base::error;
-                 break;
-            case 0xED:
-                if ((c2 & 0xE0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xE0:
+            if ((c2 & 0xE0) != 0xA0)
+              return codecvt_base::error;
+            break;
+          case 0xED:
+            if ((c2 & 0xE0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                 return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
                 return codecvt_base::error;
@@ -2427,34 +2422,33 @@ utf8_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt
         }
         else if (c1 < 0xF5)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xF0:
-                if (!(0x90 <= c2 && c2 <= 0xBF))
-                    return codecvt_base::error;
-                 break;
-            case 0xF4:
-                if ((c2 & 0xF0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xF0:
+            if (!(0x90 <= c2 && c2 <= 0xBF))
+              return codecvt_base::error;
+            break;
+          case 0xF4:
+            if ((c2 & 0xF0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                 return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
-                 return codecvt_base::error;
-            if (frm_end-frm_nxt < 4)
-                 return codecvt_base::partial;
+              return codecvt_base::error;
+            if (frm_end - frm_nxt < 4)
+              return codecvt_base::partial;
             uint8_t c4 = frm_nxt[3];
             if ((c4 & 0xC0) != 0x80)
-                 return codecvt_base::error;
+              return codecvt_base::error;
             uint32_t t = static_cast<uint32_t>(((c1 & 0x07) << 18)
                                              | ((c2 & 0x3F) << 12)
                                              | ((c3 & 0x3F) << 6)
@@ -2660,26 +2654,25 @@ utf8_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt
         }
         else if (c1 < 0xF0)
         {
-            if (frm_end-frm_nxt < 2)
-                return codecvt_base::partial;
-            uint8_t c2 = frm_nxt[1];
-            switch (c1)
-            {
-            case 0xE0:
-                if ((c2 & 0xE0) != 0xA0)
-                    return codecvt_base::error;
-                 break;
-            case 0xED:
-                if ((c2 & 0xE0) != 0x80)
-                    return codecvt_base::error;
-                 break;
-            default:
-                if ((c2 & 0xC0) != 0x80)
-                    return codecvt_base::error;
-                 break;
+          if (frm_end - frm_nxt < 2)
+            return codecvt_base::partial;
+          uint8_t c2 = frm_nxt[1];
+          switch (c1) {
+          case 0xE0:
+            if ((c2 & 0xE0) != 0xA0)
+              return codecvt_base::error;
+            break;
+          case 0xED:
+            if ((c2 & 0xE0) != 0x80)
+              return codecvt_base::error;
+            break;
+          default:
+            if ((c2 & 0xC0) != 0x80)
+              return codecvt_base::error;
+            break;
             }
-            if (frm_end-frm_nxt < 3)
-                 return codecvt_base::partial;
+            if (frm_end - frm_nxt < 3)
+              return codecvt_base::partial;
             uint8_t c3 = frm_nxt[2];
             if ((c3 & 0xC0) != 0x80)
                 return codecvt_base::error;

``````````

</details>


https://github.com/llvm/llvm-project/pull/68442


More information about the libcxx-commits mailing list