[PATCH] [Driver] Convert response files with UTF-16 BOMs to UTF-8
Reid Kleckner
rnk at google.com
Fri Jul 12 17:28:34 PDT 2013
MSBuild writes response files as UTF-16 little endian with a byte order
mark (0xFEFF). With this change, clang will be able to read them, although we
still can't parse any of their flags. BOMs with non-native endianness are
recognized as 0xFFFE. Byteswapping them is TODO.
http://llvm-reviews.chandlerc.com/D1137
Files:
test/Driver/at_file.c
test/Driver/at_file.c.args.utf16le
tools/driver/driver.cpp
Index: test/Driver/at_file.c
===================================================================
--- test/Driver/at_file.c
+++ test/Driver/at_file.c
@@ -1,5 +1,7 @@
// RUN: %clang -E %s @%s.args -o %t.log
// RUN: FileCheck --input-file=%t.log %s
+// RUN: %clang -E %s @%s.args.utf16le -o %t.log
+// RUN: FileCheck --input-file=%t.log %s
// CHECK: bar1
// CHECK-NEXT: bar2 zed2
Index: tools/driver/driver.cpp
===================================================================
--- tools/driver/driver.cpp
+++ tools/driver/driver.cpp
@@ -25,9 +25,11 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -199,16 +201,51 @@
}
const char *Buf = MemBuf->getBufferStart();
+ const char *BufEnd = MemBuf->getBufferEnd() + 1;
char InQuote = ' ';
std::string CurArg;
+ UTF16 MaybeBOM = 0;
+ memcpy(&MaybeBOM, Buf, 2);
+ const UTF16 *Src = 0, *SrcEnd = 0;
+ UTF8 UTF8Buf[4096];
+
+ if (MaybeBOM == 0xFFFE) {
+ // Byte-swapped endianness UTF16 BOM. We can't issue a diagnostic, so
+ // pretend we didn't realize this was a response file.
+ ArgVector.push_back(SaveStringInSet(SavedStrings, Arg));
+ return;
+ } else if (MaybeBOM == 0xFEFF) {
+ // Native endianness UTF16 BOM. Convert a chunk at a time as needed.
+ Src = reinterpret_cast<const UTF16 *>(Buf) + 1;
+ SrcEnd = reinterpret_cast<const UTF16 *>(BufEnd);
+ Buf = 0;
+ BufEnd = 0;
+ }
+
for (const char *P = Buf; ; ++P) {
+ if (P == BufEnd) {
+ assert(Src);
+ UTF8 *Dst = &UTF8Buf[0];
+ ConversionResult CR = ConvertUTF16toUTF8(
+ &Src, SrcEnd, &Dst, llvm::array_endof(UTF8Buf), strictConversion);
+ if (CR == sourceExhausted)
+ break;
+ if (CR != conversionOK && CR != targetExhausted)
+ return; // FIXME: Fail more loudly.
+ // Dst was updated to be one past the last translated byte.
+ P = reinterpret_cast<char *>(UTF8Buf);
+ BufEnd = reinterpret_cast<char *>(Dst);
+ }
+
if (*P == '\0' || (isWhitespace(*P) && InQuote == ' ')) {
if (!CurArg.empty()) {
if (CurArg[0] != '@') {
ArgVector.push_back(SaveStringInSet(SavedStrings, CurArg));
} else {
+ // FIXME: A response file can refer to itself and cause infinite
+ // recursion.
ExpandArgsFromBuf(CurArg.c_str(), ArgVector, SavedStrings);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1137.1.patch
Type: text/x-patch
Size: 2697 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130712/698c4c29/attachment.bin>
More information about the cfe-commits
mailing list