[PATCH] non-Unicode response file support on Windows

Yunzhong Gao Yunzhong_Gao at playstation.sony.com
Mon Jan 26 19:03:19 PST 2015


> maybe one options would be to try to check if the file is UTF-8 and fallback to current codepage if that fails.


Cleaned up and rebased the patch.
I figured that what Rafael (Espindola) meant earlier is probably to run isLegalUTF8String() before making the assumption about the system code page?


http://reviews.llvm.org/D7133

Files:
  llvm/lib/Support/CommandLine.cpp

Index: llvm/lib/Support/CommandLine.cpp
===================================================================
--- llvm/lib/Support/CommandLine.cpp
+++ llvm/lib/Support/CommandLine.cpp
@@ -36,6 +36,13 @@
 #include <cstdlib>
 #include <map>
 #include <system_error>
+
+#ifdef LLVM_ON_WIN32
+#include "Windows/WindowsSupport.h"
+#undef max
+#undef min
+#endif
+
 using namespace llvm;
 using namespace cl;
 
@@ -662,6 +669,49 @@
           S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
 }
 
+#ifdef LLVM_ON_WIN32
+// Convert system-locale encoded string to UTF8
+static bool convertANSIToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
+  assert(Out.empty());
+
+  if (SrcBytes.empty())
+    return true;
+
+  SmallVector<wchar_t, 128> utf16;
+  SmallVector<char, 128> utf8;
+
+  int len = ::MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, SrcBytes.data(),
+                                  SrcBytes.size(), utf16.begin(), 0);
+  if (len == 0)
+    return false;
+
+  utf16.reserve(len + 1);
+  utf16.set_size(len);
+
+  len = ::MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, SrcBytes.data(),
+                              SrcBytes.size(), utf16.begin(), utf16.size());
+  if (len == 0)
+    return false;
+
+  len = ::WideCharToMultiByte(CP_UTF8, 0, utf16.begin(), utf16.size(),
+                              utf8.begin(), 0, nullptr, nullptr);
+  if (len == 0)
+    return false;
+
+  utf8.reserve(len + 1);
+  utf8.set_size(len);
+
+  len = ::WideCharToMultiByte(CP_UTF8, 0, utf16.begin(), utf16.size(),
+                              utf8.data(), utf8.size(), nullptr, nullptr);
+  if (len == 0)
+    return false;
+
+  Out.resize(utf8.size());
+  std::copy(utf8.begin(), utf8.end(), Out.begin());
+  return true;
+}
+#endif // LLVM_ON_WIN32
+
 static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
                                TokenizerCallback Tokenizer,
                                SmallVectorImpl<const char *> &NewArgv,
@@ -686,6 +736,19 @@
   // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark
   else if (hasUTF8ByteOrderMark(BufRef))
     Str = StringRef(BufRef.data() + 3, BufRef.size() - 3);
+#ifdef LLVM_ON_WIN32
+  // Otherwise, this might be a hand-written text file encoded in the system's
+  // default code page.
+  else {
+    const UTF8 *Begin = reinterpret_cast<const UTF8 *>(BufRef.begin()),
+    if (!isLegalUTF8String(&Begin,
+           reinterpret_cast<const UTF8 *>(BufRef.end()))) {
+      if (!convertANSIToUTF8String(BufRef, UTF8Buf))
+        return false;
+      Str = StringRef(UTF8Buf);
+    }
+  }
+#endif
 
   // Tokenize the contents into NewArgv.
   Tokenizer(Str, Saver, NewArgv, MarkEOLs);

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D7133.18794.patch
Type: text/x-patch
Size: 2687 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/47dd61e9/attachment.bin>


More information about the llvm-commits mailing list