[PATCH] non-Unicode response file support on Windows
Yunzhong Gao
Yunzhong_Gao at playstation.sony.com
Mon Jan 26 19:03:19 PST 2015
> maybe one options would be to try to check if the file is UTF-8 and fallback to current codepage if that fails.
Cleaned up and rebased the patch.
I figured that what Rafael (Espindola) meant earlier is probably to run isLegalUTF8String() before making the assumption about the system code page?
http://reviews.llvm.org/D7133
Files:
llvm/lib/Support/CommandLine.cpp
Index: llvm/lib/Support/CommandLine.cpp
===================================================================
--- llvm/lib/Support/CommandLine.cpp
+++ llvm/lib/Support/CommandLine.cpp
@@ -36,6 +36,13 @@
#include <cstdlib>
#include <map>
#include <system_error>
+
+#ifdef LLVM_ON_WIN32
+#include "Windows/WindowsSupport.h"
+#undef max
+#undef min
+#endif
+
using namespace llvm;
using namespace cl;
@@ -662,6 +669,49 @@
S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
}
+#ifdef LLVM_ON_WIN32
+// Convert system-locale encoded string to UTF8
+static bool convertANSIToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
+ assert(Out.empty());
+
+ if (SrcBytes.empty())
+ return true;
+
+ SmallVector<wchar_t, 128> utf16;
+ SmallVector<char, 128> utf8;
+
+ int len = ::MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, SrcBytes.data(),
+ SrcBytes.size(), utf16.begin(), 0);
+ if (len == 0)
+ return false;
+
+ utf16.reserve(len + 1);
+ utf16.set_size(len);
+
+ len = ::MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, SrcBytes.data(),
+ SrcBytes.size(), utf16.begin(), utf16.size());
+ if (len == 0)
+ return false;
+
+ len = ::WideCharToMultiByte(CP_UTF8, 0, utf16.begin(), utf16.size(),
+ utf8.begin(), 0, nullptr, nullptr);
+ if (len == 0)
+ return false;
+
+ utf8.reserve(len + 1);
+ utf8.set_size(len);
+
+ len = ::WideCharToMultiByte(CP_UTF8, 0, utf16.begin(), utf16.size(),
+ utf8.data(), utf8.size(), nullptr, nullptr);
+ if (len == 0)
+ return false;
+
+ Out.resize(utf8.size());
+ std::copy(utf8.begin(), utf8.end(), Out.begin());
+ return true;
+}
+#endif // LLVM_ON_WIN32
+
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
TokenizerCallback Tokenizer,
SmallVectorImpl<const char *> &NewArgv,
@@ -686,6 +736,19 @@
// Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark
else if (hasUTF8ByteOrderMark(BufRef))
Str = StringRef(BufRef.data() + 3, BufRef.size() - 3);
+#ifdef LLVM_ON_WIN32
+ // Otherwise, this might be a hand-written text file encoded in the system's
+ // default code page.
+ else {
+ const UTF8 *Begin = reinterpret_cast<const UTF8 *>(BufRef.begin()),
+ if (!isLegalUTF8String(&Begin,
+ reinterpret_cast<const UTF8 *>(BufRef.end()))) {
+ if (!convertANSIToUTF8String(BufRef, UTF8Buf))
+ return false;
+ Str = StringRef(UTF8Buf);
+ }
+ }
+#endif
// Tokenize the contents into NewArgv.
Tokenizer(Str, Saver, NewArgv, MarkEOLs);
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D7133.18794.patch
Type: text/x-patch
Size: 2687 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/47dd61e9/attachment.bin>
More information about the llvm-commits
mailing list