[llvm] r334375 - Attempt 3: Resubmit "[Support] Expose flattenWindowsCommandLine."

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 10 13:57:14 PDT 2018


Author: zturner
Date: Sun Jun 10 13:57:14 2018
New Revision: 334375

URL: http://llvm.org/viewvc/llvm-project?rev=334375&view=rev
Log:
Attempt 3: Resubmit "[Support] Expose flattenWindowsCommandLine."

I took some liberties and quoted fewer characters than before,
based on an article from MSDN which says that only certain characters
cause an arg to require quoting.  This seems to be incorrect, though,
and worse it seems to be a difference in Windows version.  The bot
that fails is Windows 7, and I can't reproduce the failure on Win
10.  But it's definitely related to quoting and special characters,
because both tests that fail have a * in the argument, which is one
of the special characters that would cause an argument to be quoted
before but not any longer after the new patch.

Since I don't have Win 7, all I can do is just guess that I need to
restore the old quoting rules.  So this patch does that in hopes that
it fixes the problem on Windows 7.

Modified:
    llvm/trunk/include/llvm/Support/Program.h
    llvm/trunk/lib/Support/Program.cpp
    llvm/trunk/lib/Support/Unix/Program.inc
    llvm/trunk/lib/Support/Windows/Path.inc
    llvm/trunk/lib/Support/Windows/Program.inc

Modified: llvm/trunk/include/llvm/Support/Program.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Program.h?rev=334375&r1=334374&r2=334375&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/Program.h (original)
+++ llvm/trunk/include/llvm/Support/Program.h Sun Jun 10 13:57:14 2018
@@ -133,6 +133,11 @@ namespace sys {
   /// Return true if the given arguments fit within system-specific
   /// argument length limits.
   bool commandLineFitsWithinSystemLimits(StringRef Program,
+                                         ArrayRef<StringRef> Args);
+
+  /// Return true if the given arguments fit within system-specific
+  /// argument length limits.
+  bool commandLineFitsWithinSystemLimits(StringRef Program,
                                          ArrayRef<const char *> Args);
 
   /// File encoding options when writing contents that a non-UTF8 tool will
@@ -189,6 +194,14 @@ namespace sys {
       ///< string is non-empty upon return an error occurred while invoking the
       ///< program.
       );
+
+#if defined(_WIN32)
+  /// Given a list of command line arguments, quote and escape them as necessary
+  /// to build a single flat command line appropriate for calling CreateProcess
+  /// on
+  /// Windows.
+  std::string flattenWindowsCommandLine(ArrayRef<StringRef> Args);
+#endif
   }
 }
 

Modified: llvm/trunk/lib/Support/Program.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Program.cpp?rev=334375&r1=334374&r2=334375&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Program.cpp (original)
+++ llvm/trunk/lib/Support/Program.cpp Sun Jun 10 13:57:14 2018
@@ -63,6 +63,15 @@ ProcessInfo sys::ExecuteNoWait(StringRef
   return PI;
 }
 
+bool sys::commandLineFitsWithinSystemLimits(StringRef Program,
+                                            ArrayRef<const char *> Args) {
+  SmallVector<StringRef, 8> StringRefArgs;
+  StringRefArgs.reserve(Args.size());
+  for (const char *A : Args)
+    StringRefArgs.emplace_back(A);
+  return commandLineFitsWithinSystemLimits(Program, StringRefArgs);
+}
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Program.inc"

Modified: llvm/trunk/lib/Support/Unix/Program.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Unix/Program.inc?rev=334375&r1=334374&r2=334375&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Unix/Program.inc (original)
+++ llvm/trunk/lib/Support/Unix/Program.inc Sun Jun 10 13:57:14 2018
@@ -434,7 +434,7 @@ llvm::sys::writeFileWithEncoding(StringR
 }
 
 bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program,
-                                                  ArrayRef<const char *> Args) {
+                                                  ArrayRef<StringRef> Args) {
   static long ArgMax = sysconf(_SC_ARG_MAX);
   // POSIX requires that _POSIX_ARG_MAX is 4096, which is the lowest possible
   // value for ARG_MAX on a POSIX compliant system.
@@ -456,18 +456,16 @@ bool llvm::sys::commandLineFitsWithinSys
   long HalfArgMax = EffectiveArgMax / 2;
 
   size_t ArgLength = Program.size() + 1;
-  for (const char* Arg : Args) {
-    size_t length = strlen(Arg);
-
+  for (StringRef Arg : Args) {
     // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which
     // does not have a constant unlike what the man pages would have you
     // believe. Since this limit is pretty high, perform the check
     // unconditionally rather than trying to be aggressive and limiting it to
     // Linux only.
-    if (length >= (32 * 4096))
+    if (Arg.size() >= (32 * 4096))
       return false;
 
-    ArgLength += length + 1;
+    ArgLength += Arg.size() + 1;
     if (ArgLength > size_t(HalfArgMax)) {
       return false;
     }

Modified: llvm/trunk/lib/Support/Windows/Path.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Path.inc?rev=334375&r1=334374&r2=334375&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Path.inc (original)
+++ llvm/trunk/lib/Support/Windows/Path.inc Sun Jun 10 13:57:14 2018
@@ -1197,7 +1197,7 @@ Expected<file_t> openNativeFileForRead(c
   if (Result && RealPath)
     realPathFromHandle(*Result, *RealPath);
 
-  return std::move(Result);
+  return Result;
 }
 
 void closeFile(file_t &F) {

Modified: llvm/trunk/lib/Support/Windows/Program.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Program.inc?rev=334375&r1=334374&r2=334375&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Program.inc (original)
+++ llvm/trunk/lib/Support/Windows/Program.inc Sun Jun 10 13:57:14 2018
@@ -23,6 +23,7 @@
 #include <fcntl.h>
 #include <io.h>
 #include <malloc.h>
+#include <numeric>
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only Win32 specific code
@@ -31,7 +32,7 @@
 
 namespace llvm {
 
-ProcessInfo::ProcessInfo() : Process(0), Pid(0), ReturnCode(0) {}
+ProcessInfo::ProcessInfo() : Pid(0), Process(0), ReturnCode(0) {}
 
 ErrorOr<std::string> sys::findProgramByName(StringRef Name,
                                             ArrayRef<StringRef> Paths) {
@@ -146,108 +147,13 @@ static HANDLE RedirectIO(Optional<String
   return h;
 }
 
-/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
-/// CreateProcess.
-static bool ArgNeedsQuotes(const char *Str) {
-  return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
-}
-
-/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur
-/// in the C string Start.
-static unsigned int CountPrecedingBackslashes(const char *Start,
-                                              const char *Cur) {
-  unsigned int Count = 0;
-  --Cur;
-  while (Cur >= Start && *Cur == '\\') {
-    ++Count;
-    --Cur;
-  }
-  return Count;
-}
-
-/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash
-/// preceding Cur in the Start string.  Assumes Dst has enough space.
-static char *EscapePrecedingEscapes(char *Dst, const char *Start,
-                                    const char *Cur) {
-  unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur);
-  while (PrecedingEscapes > 0) {
-    *Dst++ = '\\';
-    --PrecedingEscapes;
-  }
-  return Dst;
-}
-
-/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
-/// CreateProcess and returns length of quoted arg with escaped quotes
-static unsigned int ArgLenWithQuotes(const char *Str) {
-  const char *Start = Str;
-  bool Quoted = ArgNeedsQuotes(Str);
-  unsigned int len = Quoted ? 2 : 0;
-
-  while (*Str != '\0') {
-    if (*Str == '\"') {
-      // We need to add a backslash, but ensure that it isn't escaped.
-      unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
-      len += PrecedingEscapes + 1;
-    }
-    // Note that we *don't* need to escape runs of backslashes that don't
-    // precede a double quote!  See MSDN:
-    // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx
-
-    ++len;
-    ++Str;
-  }
-
-  if (Quoted) {
-    // Make sure the closing quote doesn't get escaped by a trailing backslash.
-    unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
-    len += PrecedingEscapes + 1;
-  }
-
-  return len;
-}
-
 }
 
-static std::unique_ptr<char[]> flattenArgs(const char **Args) {
-  // First, determine the length of the command line.
-  unsigned len = 0;
-  for (unsigned i = 0; Args[i]; i++) {
-    len += ArgLenWithQuotes(Args[i]) + 1;
-  }
-
-  // Now build the command line.
-  std::unique_ptr<char[]> command(new char[len+1]);
-  char *p = command.get();
-
-  for (unsigned i = 0; Args[i]; i++) {
-    const char *arg = Args[i];
-    const char *start = arg;
-
-    bool needsQuoting = ArgNeedsQuotes(arg);
-    if (needsQuoting)
-      *p++ = '"';
-
-    while (*arg != '\0') {
-      if (*arg == '\"') {
-        // Escape all preceding escapes (if any), and then escape the quote.
-        p = EscapePrecedingEscapes(p, start, arg);
-        *p++ = '\\';
-      }
-
-      *p++ = *arg++;
-    }
-
-    if (needsQuoting) {
-      // Make sure our quote doesn't get escaped by a trailing backslash.
-      p = EscapePrecedingEscapes(p, start, arg);
-      *p++ = '"';
-    }
-    *p++ = ' ';
-  }
-
-  *p = 0;
-  return command;
+static SmallVector<StringRef, 8> buildArgVector(const char **Args) {
+  SmallVector<StringRef, 8> Result;
+  for (unsigned I = 0; Args[I]; ++I)
+    Result.push_back(StringRef(Args[I]));
+  return Result;
 }
 
 static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args,
@@ -270,7 +176,8 @@ static bool Execute(ProcessInfo &PI, Str
   // Windows wants a command line, not an array of args, to pass to the new
   // process.  We have to concatenate them all, while quoting the args that
   // have embedded spaces (or are empty).
-  std::unique_ptr<char[]> command = flattenArgs(Args);
+  auto ArgVector = buildArgVector(Args);
+  std::string Command = flattenWindowsCommandLine(ArgVector);
 
   // The pointer to the environment block for the new process.
   std::vector<wchar_t> EnvBlock;
@@ -353,7 +260,7 @@ static bool Execute(ProcessInfo &PI, Str
   }
 
   SmallVector<wchar_t, MAX_PATH> CommandUtf16;
-  if (std::error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
+  if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16)) {
     SetLastError(ec.value());
     MakeErrMsg(ErrMsg,
                std::string("Unable to convert command-line to UTF-16"));
@@ -414,7 +321,63 @@ static bool Execute(ProcessInfo &PI, Str
   return true;
 }
 
+static bool argNeedsQuotes(StringRef Arg) {
+  if (Arg.empty())
+    return true;
+  return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|");
+}
+
+static std::string quoteSingleArg(StringRef Arg) {
+  std::string Result;
+  Result.push_back('"');
+
+  while (!Arg.empty()) {
+    size_t FirstNonBackslash = Arg.find_first_not_of('\\');
+    size_t BackslashCount = FirstNonBackslash;
+    if (FirstNonBackslash == StringRef::npos) {
+      // The entire remainder of the argument is backslashes.  Escape all of
+      // them and just early out.
+      BackslashCount = Arg.size();
+      Result.append(BackslashCount * 2, '\\');
+      break;
+    }
+
+    if (Arg[FirstNonBackslash] == '\"') {
+      // This is an embedded quote.  Escape all preceding backslashes, then
+      // add one additional backslash to escape the quote.
+      Result.append(BackslashCount * 2 + 1, '\\');
+      Result.push_back('\"');
+    } else {
+      // This is just a normal character.  Don't escape any of the preceding
+      // backslashes, just append them as they are and then append the
+      // character.
+      Result.append(BackslashCount, '\\');
+      Result.push_back(Arg[FirstNonBackslash]);
+    }
+
+    // Drop all the backslashes, plus the following character.
+    Arg = Arg.drop_front(FirstNonBackslash + 1);
+  }
+
+  Result.push_back('"');
+  return Result;
+}
+
 namespace llvm {
+std::string sys::flattenWindowsCommandLine(ArrayRef<StringRef> Args) {
+  std::string Command;
+  for (StringRef Arg : Args) {
+    if (argNeedsQuotes(Arg))
+      Command += quoteSingleArg(Arg);
+    else
+      Command += Arg;
+
+    Command.push_back(' ');
+  }
+
+  return Command;
+}
+
 ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
                       bool WaitUntilChildTerminates, std::string *ErrMsg) {
   assert(PI.Pid && "invalid pid to wait on, process not started?");
@@ -537,19 +500,13 @@ llvm::sys::writeFileWithEncoding(StringR
 }
 
 bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program,
-                                                  ArrayRef<const char *> Args) {
+                                                  ArrayRef<StringRef> Args) {
   // The documented max length of the command line passed to CreateProcess.
   static const size_t MaxCommandStringLength = 32768;
-  // Account for the trailing space for the program path and the
-  // trailing NULL of the last argument.
-  size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2;
-  for (const char* Arg : Args) {
-    // Account for the trailing space for every arg
-    ArgLength += ArgLenWithQuotes(Arg) + 1;
-    if (ArgLength > MaxCommandStringLength) {
-      return false;
-    }
-  }
-  return true;
+  SmallVector<StringRef, 8> FullArgs;
+  FullArgs.push_back(Program);
+  FullArgs.append(Args.begin(), Args.end());
+  std::string Result = flattenWindowsCommandLine(FullArgs);
+  return (Result.size() + 1) <= MaxCommandStringLength;
 }
 }




More information about the llvm-commits mailing list