<div dir="ltr">On Fri, Jul 12, 2013 at 5:28 PM, Reid Kleckner <span dir="ltr"><<a href="mailto:rnk@google.com" target="_blank">rnk@google.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
MSBuild writes response files as UTF-16 little endian with a byte order<br>
mark (0xFEFF). With this change, clang will be able to read them, although we<br>
still can't parse any of their flags. BOMs with non-native endianness are<br>
recognized as 0xFFFE. Byteswapping them is TODO.<br>
<br>
<a href="http://llvm-reviews.chandlerc.com/D1137" target="_blank">http://llvm-reviews.chandlerc.com/D1137</a><br>
<br>
Files:<br>
test/Driver/at_file.c<br>
test/Driver/at_file.c.args.utf16le<br>
tools/driver/driver.cpp<br>
<br>
Index: test/Driver/at_file.c<br>
===================================================================<br>
--- test/Driver/at_file.c<br>
+++ test/Driver/at_file.c<br>
@@ -1,5 +1,7 @@<br>
// RUN: %clang -E %s @%s.args -o %t.log<br>
// RUN: FileCheck --input-file=%t.log %s<br>
+// RUN: %clang -E %s @%s.args.utf16le -o %t.log<br>
+// RUN: FileCheck --input-file=%t.log %s<br>
<br>
// CHECK: bar1<br>
// CHECK-NEXT: bar2 zed2<br>
Index: tools/driver/driver.cpp<br>
===================================================================<br>
--- tools/driver/driver.cpp<br>
+++ tools/driver/driver.cpp<br>
@@ -25,9 +25,11 @@<br>
#include "llvm/ADT/OwningPtr.h"<br>
#include "llvm/ADT/SmallString.h"<br>
#include "llvm/ADT/SmallVector.h"<br>
+#include "llvm/ADT/STLExtras.h"<br>
#include "llvm/Option/ArgList.h"<br>
#include "llvm/Option/OptTable.h"<br>
#include "llvm/Option/Option.h"<br>
+#include "llvm/Support/ConvertUTF.h"<br>
#include "llvm/Support/ErrorHandling.h"<br>
#include "llvm/Support/FileSystem.h"<br>
#include "llvm/Support/Host.h"<br>
@@ -199,16 +201,51 @@<br>
}<br>
<br>
const char *Buf = MemBuf->getBufferStart();<br>
+ const char *BufEnd = MemBuf->getBufferEnd() + 1;<br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> char InQuote = ' ';<br>
std::string CurArg;<br>
<br>
+ UTF16 MaybeBOM = 0;<br>
+ memcpy(&MaybeBOM, Buf, 2);<br>
+ const UTF16 *Src = 0, *SrcEnd = 0;<br>
+ UTF8 UTF8Buf[4096];<br>
+<br>
+ if (MaybeBOM == 0xFFFE) {<br>
+ // Byte-swapped endianness UTF16 BOM. We can't issue a diagnostic, so<br>
+ // pretend we didn't realize this was a response file.<br>
+ ArgVector.push_back(SaveStringInSet(SavedStrings, Arg));<br>
+ return;<br>
+ } else if (MaybeBOM == 0xFEFF) {<br>
+ // Native endianness UTF16 BOM. Convert a chunk at a time as needed.<br>
+ Src = reinterpret_cast<const UTF16 *>(Buf) + 1;<br>
+ SrcEnd = reinterpret_cast<const UTF16 *>(BufEnd);<br></blockquote><div><br></div><div>Is BufEnd guaranteed to be aligned on a UTF16* boundary here?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ Buf = 0;<br>
+ BufEnd = 0;<br>
+ }<br>
+<br>
for (const char *P = Buf; ; ++P) {<br>
+ if (P == BufEnd) {<br>
+ assert(Src);<br>
+ UTF8 *Dst = &UTF8Buf[0];<br>
+ ConversionResult CR = ConvertUTF16toUTF8(<br>
+ &Src, SrcEnd, &Dst, llvm::array_endof(UTF8Buf), strictConversion);<br>
+ if (CR == sourceExhausted)<br>
+ break;<br>
+ if (CR != conversionOK && CR != targetExhausted)<br>
+ return; // FIXME: Fail more loudly.<br>
+ // Dst was updated to be one past the last translated byte.<br>
+ P = reinterpret_cast<char *>(UTF8Buf);<br>
+ BufEnd = reinterpret_cast<char *>(Dst);<br>
+ }<br>
+<br>
if (*P == '\0' || (isWhitespace(*P) && InQuote == ' ')) {<br>
if (!CurArg.empty()) {<br>
<br>
if (CurArg[0] != '@') {<br>
ArgVector.push_back(SaveStringInSet(SavedStrings, CurArg));<br>
} else {<br>
+ // FIXME: A response file can refer to itself and cause infinite<br>
+ // recursion.<br>
ExpandArgsFromBuf(CurArg.c_str(), ArgVector, SavedStrings);<br>
}<br>
<br>_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
<br></blockquote></div><br></div></div>