[llvm] r227122 - Add a UTF8 to UTF16 conversion wrapper for use in the pdb dumper

Reid Kleckner reid at kleckner.net
Mon Jan 26 11:51:01 PST 2015


Author: rnk
Date: Mon Jan 26 13:51:00 2015
New Revision: 227122

URL: http://llvm.org/viewvc/llvm-project?rev=227122&view=rev
Log:
Add a UTF8 to UTF16 conversion wrapper for use in the pdb dumper

This can also be used instead of the WindowsSupport.h ConvertUTF8ToUTF16
helpers, but that will require massaging some character types. The
Windows support routines want wchar_t output, but wchar_t is often 32
bits on non-Windows OSs.

Modified:
    llvm/trunk/include/llvm/Support/ConvertUTF.h
    llvm/trunk/lib/Support/ConvertUTFWrapper.cpp
    llvm/trunk/unittests/Support/ConvertUTFTest.cpp

Modified: llvm/trunk/include/llvm/Support/ConvertUTF.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ConvertUTF.h?rev=227122&r1=227121&r2=227122&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/ConvertUTF.h (original)
+++ llvm/trunk/include/llvm/Support/ConvertUTF.h Mon Jan 26 13:51:00 2015
@@ -251,6 +251,14 @@ bool hasUTF16ByteOrderMark(ArrayRef<char
  */
 bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
 
+/**
+ * Converts a UTF-8 string into a UTF-16 string with native endianness.
+ *
+ * \returns true on success
+ */
+bool convertUTF8ToUTF16String(StringRef SrcUTF8,
+                              SmallVectorImpl<UTF16> &DstUTF16);
+
 } /* end namespace llvm */
 
 #endif

Modified: llvm/trunk/lib/Support/ConvertUTFWrapper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/ConvertUTFWrapper.cpp?rev=227122&r1=227121&r2=227122&view=diff
==============================================================================
--- llvm/trunk/lib/Support/ConvertUTFWrapper.cpp (original)
+++ llvm/trunk/lib/Support/ConvertUTFWrapper.cpp Mon Jan 26 13:51:00 2015
@@ -127,5 +127,36 @@ bool convertUTF16ToUTF8String(ArrayRef<c
   return true;
 }
 
+bool convertUTF8ToUTF16String(StringRef SrcUTF8,
+                              SmallVectorImpl<UTF16> &DstUTF16) {
+  assert(DstUTF16.empty());
+
+  // Avoid OOB by returning early on empty input.
+  if (SrcUTF8.empty())
+    return true;
+
+  const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
+  const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
+
+  // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
+  // as UTF-16 should always require the same amount or less code units than the
+  // UTF-8 encoding.
+  DstUTF16.resize(SrcUTF8.size());
+  UTF16 *Dst = &DstUTF16[0];
+  UTF16 *DstEnd = Dst + DstUTF16.size();
+
+  ConversionResult CR =
+      ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+  assert(CR != targetExhausted);
+
+  if (CR != conversionOK) {
+    DstUTF16.clear();
+    return false;
+  }
+
+  DstUTF16.resize(Dst - &DstUTF16[0]);
+  return true;
+}
+
 } // end namespace llvm
 

Modified: llvm/trunk/unittests/Support/ConvertUTFTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/ConvertUTFTest.cpp?rev=227122&r1=227121&r2=227122&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/ConvertUTFTest.cpp (original)
+++ llvm/trunk/unittests/Support/ConvertUTFTest.cpp Mon Jan 26 13:51:00 2015
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Format.h"
 #include "gtest/gtest.h"
 #include <string>
 #include <utility>
@@ -37,6 +38,19 @@ TEST(ConvertUTFTest, ConvertUTF16BigEndi
   EXPECT_EQ(Expected, Result);
 }
 
+TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
+  // Src is the look of disapproval.
+  static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
+  StringRef Ref(Src, sizeof(Src) - 1);
+  SmallVector<UTF16, 5> Result;
+  bool Success = convertUTF8ToUTF16String(Ref, Result);
+  EXPECT_TRUE(Success);
+  static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
+  ASSERT_EQ(3, Result.size());
+  for (int I = 0, E = 3; I != E; ++I)
+    EXPECT_EQ(Expected[I], Result[I]);
+}
+
 TEST(ConvertUTFTest, OddLengthInput) {
   std::string Result;
   bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);





More information about the llvm-commits mailing list