[llvm] a6beb18 - Revert "Add UTF32 to/from UTF8 conversion functions"

Aaron Ballman via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 22 12:01:00 PDT 2022


Author: Aaron Ballman
Date: 2022-03-22T15:00:40-04:00
New Revision: a6beb18b845ca8548319d08df9eea46c87e1e533

URL: https://github.com/llvm/llvm-project/commit/a6beb18b845ca8548319d08df9eea46c87e1e533
DIFF: https://github.com/llvm/llvm-project/commit/a6beb18b845ca8548319d08df9eea46c87e1e533.diff

LOG: Revert "Add UTF32 to/from UTF8 conversion functions"

This reverts commit c3460689288abc98c91d8d6bffa74be9eb16c74d.

It broke at least one of the builders:
https://lab.llvm.org/buildbot#builders/100/builds/13947

Added: 
    

Modified: 
    llvm/include/llvm/Support/ConvertUTF.h
    llvm/lib/Support/ConvertUTFWrapper.cpp
    llvm/unittests/Support/ConvertUTFTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h
index 662f3aca5b543..374cdb907fdc2 100644
--- a/llvm/include/llvm/Support/ConvertUTF.h
+++ b/llvm/include/llvm/Support/ConvertUTF.h
@@ -126,9 +126,6 @@ typedef unsigned char   Boolean; /* 0 or 1 */
 #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE  0xFEFF
 #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
 
-#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF
-#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000
-
 typedef enum {
   conversionOK,           /* conversion successful */
   sourceExhausted,        /* partial character in source, but hit end */
@@ -284,24 +281,6 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
 */
 bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out);
 
-/**
- * Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
- *
- * \param [in] SrcBytes A buffer of what is assumed to be UTF-32 encoded text.
- * \param [out] Out Converted UTF-8 is stored here on success.
- * \returns true on success
- */
-bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
-
-/**
- * Converts a UTF32 string into a UTF8 std::string.
- *
- * \param [in] Src A buffer of UTF-32 encoded text.
- * \param [out] Out Converted UTF-8 is stored here on success.
- * \returns true on success
- */
-bool convertUTF32ToUTF8String(ArrayRef<UTF32> Src, std::string &Out);
-
 /**
  * Converts a UTF-8 string into a UTF-16 string with native endianness.
  *

diff  --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp
index 955d9e7efedd7..392c4c4890e16 100644
--- a/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -141,62 +141,6 @@ bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out)
       Src.size() * sizeof(UTF16)), Out);
 }
 
-bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
-  assert(Out.empty());
-
-  // Avoid OOB by returning early on empty input.
-  if (SrcBytes.empty() &&
-      (SrcBytes.size() % 4) != 0) // Assume multiple of 4 bytes; Unicode's max
-                                  // num of code units in UTF-8.
-    return true;
-
-  const UTF32 *Src = reinterpret_cast<const UTF32 *>(SrcBytes.begin());
-  const UTF32 *SrcEnd = reinterpret_cast<const UTF32 *>(SrcBytes.end());
-
-  assert((uintptr_t)Src % sizeof(UTF32) == 0);
-
-  // Byteswap if necessary.
-  std::vector<UTF32> ByteSwapped;
-  if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_SWAPPED) {
-    ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
-    for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I)
-      ByteSwapped[I] = llvm::ByteSwap_32(ByteSwapped[I]);
-    Src = &ByteSwapped[0];
-    SrcEnd = Src + ByteSwapped.size();
-  }
-
-  // Skip the BOM for conversion.
-  if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_NATIVE)
-    Src++;
-
-  // Just allocate enough space up front.  We'll shrink it later.  Allocate
-  // enough that we can fit a null terminator without reallocating.
-  Out.resize(SrcBytes.size() + 1); //
-  UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
-  UTF8 *DstEnd = Dst + Out.size();
-
-  ConversionResult CR =
-      ConvertUTF32toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
-  assert(CR != targetExhausted);
-
-  if (CR != conversionOK) {
-    Out.clear();
-    return false;
-  }
-
-  Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
-  Out.push_back(0);
-  Out.pop_back();
-  return true;
-}
-
-bool convertUTF32ToUTF8String(ArrayRef<UTF32> Src, std::string &Out) {
-  return convertUTF32ToUTF8String(
-      llvm::ArrayRef<char>(reinterpret_cast<const char *>(Src.data()),
-                           Src.size() * sizeof(UTF32)),
-      Out);
-}
-
 bool convertUTF8ToUTF16String(StringRef SrcUTF8,
                               SmallVectorImpl<UTF16> &DstUTF16) {
   assert(DstUTF16.empty());

diff  --git a/llvm/unittests/Support/ConvertUTFTest.cpp b/llvm/unittests/Support/ConvertUTFTest.cpp
index 79136b7029068..7bda6ea28ad69 100644
--- a/llvm/unittests/Support/ConvertUTFTest.cpp
+++ b/llvm/unittests/Support/ConvertUTFTest.cpp
@@ -25,17 +25,6 @@ TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
   EXPECT_EQ(Expected, Result);
 }
 
-TEST(ConvertUTFTest, ConvertUTF32LittleEndianToUTF8String) {
-  // Src is a crystal ball.
-  alignas(UTF32) static const char Src[] = "\x2E\xF5\x01\x00";
-  ArrayRef<char> Ref(Src, sizeof(Src) - 1);
-  std::string Result;
-  bool Success = convertUTF32ToUTF8String(Ref, Result);
-  EXPECT_TRUE(Success);
-  std::string Expected("\xF0\x9F\x94\xAE");
-  EXPECT_EQ(Expected, Result);
-}
-
 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
   // Src is the look of disapproval.
   alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
@@ -47,17 +36,6 @@ TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
   EXPECT_EQ(Expected, Result);
 }
 
-TEST(ConvertUTFTest, ConvertUTF32BigEndianToUTF8String) {
-  // Src is a crystal ball.
-  alignas(UTF32) static const char Src[] = "\x00\x00\xfe\xff\x00\x01\xF5\x2E";
-  ArrayRef<char> Ref(Src, sizeof(Src) - 1);
-  std::string Result;
-  bool Success = convertUTF32ToUTF8String(Ref, Result);
-  EXPECT_TRUE(Success);
-  std::string Expected("\xF0\x9F\x94\xAE");
-  EXPECT_EQ(Expected, Result);
-}
-
 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
   // Src is the look of disapproval.
   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";


        


More information about the llvm-commits mailing list