[llvm] r226988 - If we see UTF-8 BOM sequence at the beginning of a response file, we shall
Yunzhong Gao
Yunzhong_Gao at playstation.sony.com
Fri Jan 23 20:23:09 PST 2015
Author: ygao
Date: Fri Jan 23 22:23:08 2015
New Revision: 226988
URL: http://llvm.org/viewvc/llvm-project?rev=226988&view=rev
Log:
If we see UTF-8 BOM sequence at the beginning of a response file, we shall
remove these bytes before parsing.
Phabricator Revision: http://reviews.llvm.org/D7156
Added:
llvm/trunk/test/Other/Inputs/utf8-bom-response
llvm/trunk/test/Other/Inputs/utf8-response
Modified:
llvm/trunk/lib/Support/CommandLine.cpp
llvm/trunk/test/Other/ResponseFile.ll
Modified: llvm/trunk/lib/Support/CommandLine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=226988&r1=226987&r2=226988&view=diff
==============================================================================
--- llvm/trunk/lib/Support/CommandLine.cpp (original)
+++ llvm/trunk/lib/Support/CommandLine.cpp Fri Jan 23 22:23:08 2015
@@ -655,6 +655,13 @@ void cl::TokenizeWindowsCommandLine(Stri
NewArgv.push_back(nullptr);
}
+// It is called byte order marker but the UTF-8 BOM is actually not affected
+// by the host system's endianness.
+static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
+ return (S.size() >= 3 &&
+ S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
+}
+
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
TokenizerCallback Tokenizer,
SmallVectorImpl<const char *> &NewArgv,
@@ -674,6 +681,11 @@ static bool ExpandResponseFile(const cha
return false;
Str = StringRef(UTF8Buf);
}
+ // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove
+ // these bytes before parsing.
+ // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark
+ else if (hasUTF8ByteOrderMark(BufRef))
+ Str = StringRef(BufRef.data() + 3, BufRef.size() - 3);
// Tokenize the contents into NewArgv.
Tokenizer(Str, Saver, NewArgv, MarkEOLs);
Added: llvm/trunk/test/Other/Inputs/utf8-bom-response
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/Inputs/utf8-bom-response?rev=226988&view=auto
==============================================================================
--- llvm/trunk/test/Other/Inputs/utf8-bom-response (added)
+++ llvm/trunk/test/Other/Inputs/utf8-bom-response Fri Jan 23 22:23:08 2015
@@ -0,0 +1 @@
+-help
Added: llvm/trunk/test/Other/Inputs/utf8-response
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/Inputs/utf8-response?rev=226988&view=auto
==============================================================================
--- llvm/trunk/test/Other/Inputs/utf8-response (added)
+++ llvm/trunk/test/Other/Inputs/utf8-response Fri Jan 23 22:23:08 2015
@@ -0,0 +1 @@
+-help
Modified: llvm/trunk/test/Other/ResponseFile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/ResponseFile.ll?rev=226988&r1=226987&r2=226988&view=diff
==============================================================================
--- llvm/trunk/test/Other/ResponseFile.ll (original)
+++ llvm/trunk/test/Other/ResponseFile.ll Fri Jan 23 22:23:08 2015
@@ -6,6 +6,11 @@
; RUN: llvm-as @%t.list2 -o %t.bc
; RUN: llvm-nm %t.bc 2>&1 | FileCheck %s
+; When the response file begins with UTF8 BOM sequence, we shall remove them.
+; Neither command below should return a "Could not open input file" error.
+; RUN: llvm-as @%S/Inputs/utf8-response > /dev/null
+; RUN: llvm-as @%S/Inputs/utf8-bom-response > /dev/null
+
; CHECK: T foobar
define void @foobar() {
More information about the llvm-commits
mailing list