[llvm] r334353 - [Support] Expose flattenWindowsCommandLine.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 9 15:44:44 PDT 2018


Author: zturner
Date: Sat Jun  9 15:44:44 2018
New Revision: 334353

URL: http://llvm.org/viewvc/llvm-project?rev=334353&view=rev
Log:
[Support] Expose flattenWindowsCommandLine.

This function was internal to Program.inc, but I've needed this
on several occasions when I've had to use CreateProcess without
llvm's sys::Execute functions.  In doing so, I noticed that the
function was written using unsafe C-string access and was pretty
hard to understand / make sense of, so I've also re-written the
functions to use more modern LLVM constructs.

Modified:
    llvm/trunk/include/llvm/Support/Program.h
    llvm/trunk/lib/Support/Program.cpp
    llvm/trunk/lib/Support/Windows/Program.inc

Modified: llvm/trunk/include/llvm/Support/Program.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Program.h?rev=334353&r1=334352&r2=334353&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/Program.h (original)
+++ llvm/trunk/include/llvm/Support/Program.h Sat Jun  9 15:44:44 2018
@@ -133,6 +133,11 @@ namespace sys {
   /// Return true if the given arguments fit within system-specific
   /// argument length limits.
   bool commandLineFitsWithinSystemLimits(StringRef Program,
+                                         ArrayRef<StringRef> Args);
+
+  /// Return true if the given arguments fit within system-specific
+  /// argument length limits.
+  bool commandLineFitsWithinSystemLimits(StringRef Program,
                                          ArrayRef<const char *> Args);
 
   /// File encoding options when writing contents that a non-UTF8 tool will
@@ -189,6 +194,14 @@ namespace sys {
       ///< string is non-empty upon return an error occurred while invoking the
       ///< program.
       );
+
+#if defined(_WIN32)
+  /// Given a list of command line arguments, quote and escape them as necessary
+  /// to build a single flat command line appropriate for calling CreateProcess
+  /// on
+  /// Windows.
+  std::string flattenWindowsCommandLine(ArrayRef<StringRef> Args);
+#endif
   }
 }
 

Modified: llvm/trunk/lib/Support/Program.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Program.cpp?rev=334353&r1=334352&r2=334353&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Program.cpp (original)
+++ llvm/trunk/lib/Support/Program.cpp Sat Jun  9 15:44:44 2018
@@ -63,6 +63,15 @@ ProcessInfo sys::ExecuteNoWait(StringRef
   return PI;
 }
 
+bool sys::commandLineFitsWithinSystemLimits(StringRef Program,
+                                            ArrayRef<const char *> Args) {
+  SmallVector<StringRef, 8> StringRefArgs;
+  StringRefArgs.reserve(Args.size());
+  for (const char *A : Args)
+    StringRefArgs.emplace_back(A);
+  return commandLineFitsWithinSystemLimits(Program, StringRefArgs);
+}
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Program.inc"

Modified: llvm/trunk/lib/Support/Windows/Program.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Program.inc?rev=334353&r1=334352&r2=334353&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Program.inc (original)
+++ llvm/trunk/lib/Support/Windows/Program.inc Sat Jun  9 15:44:44 2018
@@ -23,6 +23,7 @@
 #include <fcntl.h>
 #include <io.h>
 #include <malloc.h>
+#include <numeric>
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only Win32 specific code
@@ -146,108 +147,13 @@ static HANDLE RedirectIO(Optional<String
   return h;
 }
 
-/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
-/// CreateProcess.
-static bool ArgNeedsQuotes(const char *Str) {
-  return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
-}
-
-/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur
-/// in the C string Start.
-static unsigned int CountPrecedingBackslashes(const char *Start,
-                                              const char *Cur) {
-  unsigned int Count = 0;
-  --Cur;
-  while (Cur >= Start && *Cur == '\\') {
-    ++Count;
-    --Cur;
-  }
-  return Count;
-}
-
-/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash
-/// preceding Cur in the Start string.  Assumes Dst has enough space.
-static char *EscapePrecedingEscapes(char *Dst, const char *Start,
-                                    const char *Cur) {
-  unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur);
-  while (PrecedingEscapes > 0) {
-    *Dst++ = '\\';
-    --PrecedingEscapes;
-  }
-  return Dst;
-}
-
-/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
-/// CreateProcess and returns length of quoted arg with escaped quotes
-static unsigned int ArgLenWithQuotes(const char *Str) {
-  const char *Start = Str;
-  bool Quoted = ArgNeedsQuotes(Str);
-  unsigned int len = Quoted ? 2 : 0;
-
-  while (*Str != '\0') {
-    if (*Str == '\"') {
-      // We need to add a backslash, but ensure that it isn't escaped.
-      unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
-      len += PrecedingEscapes + 1;
-    }
-    // Note that we *don't* need to escape runs of backslashes that don't
-    // precede a double quote!  See MSDN:
-    // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx
-
-    ++len;
-    ++Str;
-  }
-
-  if (Quoted) {
-    // Make sure the closing quote doesn't get escaped by a trailing backslash.
-    unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
-    len += PrecedingEscapes + 1;
-  }
-
-  return len;
-}
-
 }
 
-static std::unique_ptr<char[]> flattenArgs(const char **Args) {
-  // First, determine the length of the command line.
-  unsigned len = 0;
-  for (unsigned i = 0; Args[i]; i++) {
-    len += ArgLenWithQuotes(Args[i]) + 1;
-  }
-
-  // Now build the command line.
-  std::unique_ptr<char[]> command(new char[len+1]);
-  char *p = command.get();
-
-  for (unsigned i = 0; Args[i]; i++) {
-    const char *arg = Args[i];
-    const char *start = arg;
-
-    bool needsQuoting = ArgNeedsQuotes(arg);
-    if (needsQuoting)
-      *p++ = '"';
-
-    while (*arg != '\0') {
-      if (*arg == '\"') {
-        // Escape all preceding escapes (if any), and then escape the quote.
-        p = EscapePrecedingEscapes(p, start, arg);
-        *p++ = '\\';
-      }
-
-      *p++ = *arg++;
-    }
-
-    if (needsQuoting) {
-      // Make sure our quote doesn't get escaped by a trailing backslash.
-      p = EscapePrecedingEscapes(p, start, arg);
-      *p++ = '"';
-    }
-    *p++ = ' ';
-  }
-
-  *p = 0;
-  return command;
+static SmallVector<StringRef, 8> buildArgVector(const char **Args) {
+  SmallVector<StringRef, 8> Result;
+  for (unsigned I = 0; Args[I]; ++I)
+    Result.push_back(StringRef(Args[I]));
+  return Result;
 }
 
 static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args,
@@ -270,7 +176,8 @@ static bool Execute(ProcessInfo &PI, Str
   // Windows wants a command line, not an array of args, to pass to the new
   // process.  We have to concatenate them all, while quoting the args that
   // have embedded spaces (or are empty).
-  std::unique_ptr<char[]> command = flattenArgs(Args);
+  auto ArgVector = buildArgVector(Args);
+  std::string Command = flattenWindowsCommandLine(ArgVector);
 
   // The pointer to the environment block for the new process.
   std::vector<wchar_t> EnvBlock;
@@ -353,7 +260,7 @@ static bool Execute(ProcessInfo &PI, Str
   }
 
   SmallVector<wchar_t, MAX_PATH> CommandUtf16;
-  if (std::error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
+  if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16)) {
     SetLastError(ec.value());
     MakeErrMsg(ErrMsg,
                std::string("Unable to convert command-line to UTF-16"));
@@ -414,7 +321,60 @@ static bool Execute(ProcessInfo &PI, Str
   return true;
 }
 
+static bool argNeedsQuotes(StringRef Arg) {
+  if (Arg.empty())
+    return true;
+  return StringRef::npos != Arg.find_first_of(" \t\n\v\"");
+}
+
+static std::string quoteSingleArg(StringRef Arg) {
+  std::string Result;
+  Result.push_back('"');
+
+  while (!Arg.empty()) {
+    size_t FirstNonBackslash = Arg.find_first_not_of('\\');
+    size_t BackslashCount = FirstNonBackslash;
+    if (FirstNonBackslash == StringRef::npos) {
+      // The entire remainder of the argument is backslashes.  Escape all of
+      // them and just early out.
+      BackslashCount = Arg.size();
+      Result.append(BackslashCount * 2, '\\');
+      break;
+    }
+
+    if (Arg[FirstNonBackslash] == '\"') {
+      // This is an embedded quote.  Escape all preceding backslashes, then
+      // add one additional backslash to escape the quote.
+      Result.append(BackslashCount * 2 + 1, '\\');
+      Result.push_back('\"');
+    } else {
+      // This is just a normal character.  Don't escape any of the preceding
+      // backslashes, just append them as they are and then append the
+      // character.
+      Result.append(BackslashCount, '\\');
+      Result.push_back(Arg[FirstNonBackslash]);
+    }
+
+    // Drop all the backslashes, plus the following character.
+    Arg = Arg.drop_front(FirstNonBackslash + 1);
+  }
+
+  Result.push_back('"');
+  return Result;
+}
+
 namespace llvm {
+std::string sys::flattenWindowsCommandLine(ArrayRef<StringRef> Args) {
+  std::string Command;
+  for (StringRef Arg : Args) {
+    Command += argNeedsQuotes(Arg) ? quoteSingleArg(Arg) : Arg;
+
+    Command.push_back(' ');
+  }
+
+  return Command;
+}
+
 ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
                       bool WaitUntilChildTerminates, std::string *ErrMsg) {
   assert(PI.Pid && "invalid pid to wait on, process not started?");
@@ -537,19 +497,13 @@ llvm::sys::writeFileWithEncoding(StringR
 }
 
 bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program,
-                                                  ArrayRef<const char *> Args) {
+                                                  ArrayRef<StringRef> Args) {
   // The documented max length of the command line passed to CreateProcess.
   static const size_t MaxCommandStringLength = 32768;
-  // Account for the trailing space for the program path and the
-  // trailing NULL of the last argument.
-  size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2;
-  for (const char* Arg : Args) {
-    // Account for the trailing space for every arg
-    ArgLength += ArgLenWithQuotes(Arg) + 1;
-    if (ArgLength > MaxCommandStringLength) {
-      return false;
-    }
-  }
-  return true;
+  SmallVector<StringRef, 8> FullArgs;
+  FullArgs.push_back(Program);
+  FullArgs.append(Args.begin(), Args.end());
+  std::string Result = flattenWindowsCommandLine(FullArgs);
+  return (Result.size() + 1) <= MaxCommandStringLength;
 }
 }




More information about the llvm-commits mailing list